shhh <- suppressPackageStartupMessages # It's a library, so shhh!
shhh(library( mgcv ))
shhh(library(dplyr))
shhh(library(ggplot2))
shhh(library(lme4))
shhh(library(tidymv))
shhh(library(gamlss))
shhh(library(gsubfn))
shhh(library(lmerTest))
shhh(library(tidyverse))
shhh(library(boot))
shhh(library(rsample))
shhh(library(plotrix))
shhh(library(ggrepel))
shhh(library(mgcv))
shhh(library(brms))
shhh(library(bayesplot))
shhh(library(patchwork))
shhh(library(MASS))
shhh(library(tidyr))
shhh(library(extraDistr))
shhh(library(purrr))
# For exercises with Stan code
shhh(library(rstan))
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = FALSE)
library(car)
library(coda)
shhh(library(gridExtra))
theme_set(theme_bw())
options(digits=4)
options(scipen=999)
set.seed(444)
pipe_message = function(.data, status) {message(status); .data}
Read in MoTR Data
rate = 160
file_prefix = "../data/provo_f160/"
fnames = list.files(path=file_prefix)
df = data.frame()
for (f in fnames) {
temp = read.csv(paste0(file_prefix, "/", f)) %>%
mutate(subj = str_remove(f, "_reading_measures.csv"))
df = rbind(df, temp)
}
# Filter out readers who don't answer the comprehension questions correctly
filter_df = df %>%
group_by(para_nr, subj) %>% summarise(correct = if_else(unique(correctness) == 1, 1, 0)) %>% ungroup() %>%
drop_na() %>%
group_by(subj) %>% summarise(p_correct = mean(correct)) %>% ungroup() %>%
mutate(p_correct = round(p_correct, digits = 2))
`summarise()` has grouped output by 'para_nr'. You can override using the `.groups` argument.
filter_df = filter_df %>% filter(p_correct < 0.8)
filter_list = filter_df$subj
## reader_3:0.70, reader_60:0.79, reader_76:0.72 , reader_256:0.71 , reader_262:0.57
raw_df = df %>%
filter(! subj %in% c(filter_list)) %>%
mutate(word = str_trim(word)) %>%
mutate(subj = str_remove(subj, "reader_")) %>%
mutate(subj = as.character(subj)) %>%
# filter(! subj %in% c("3", "60", "76", "256", "262")) %>% # Explanation for this filtering
mutate(FPReg = if_else(total_duration == 0, -1, FPReg)) %>% #If the word is skipped we can't say that it wasn't regressed on the first pass. Set to a "NA"
dplyr::select(expr_id, cond_id, para_nr, word, word_nr, first_duration, total_duration, gaze_duration, go_past_time, FPReg, subj)
length(unique(raw_df$subj))
[1] 46
df %>%
filter(! subj %in% c(filter_list)) %>%
filter(FPReg >= 0) %>%
dplyr::select(FPReg) %>%
drop_na() %>%
summarise( m = mean(FPReg))
df %>%
filter(! subj %in% c(filter_list)) %>%
dplyr::select(FPFix) %>%
drop_na() %>%
summarise( m = mean(FPFix))
NA
NA
# Average across subjects
motr_agg_df = raw_df %>%
gather(metric, value, 6:10) %>%
filter(value >= 0) %>% #Removes the "NA" values for FPReg
# ==== Remove skipped words
# mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
# filter(zero == F) %>%
drop_na() %>%
group_by(para_nr, word_nr, word, metric) %>%
mutate(outlier = if_else(metric != "FPReg" & value > (mean(value) + 3 * sd(value)), T, F)) %>% filter(outlier == F) %>%
summarise(value = mean(value), nsubj = length(unique(subj))) %>%
ungroup() %>%
arrange(para_nr, word_nr) %>%
rename(text_id = para_nr, word_text_idx = word_nr, motr_value = value)
`summarise()` has grouped output by 'para_nr', 'word_nr', 'word'. You can override using the `.groups` argument.
# View(motr_agg_df)
# write.csv(motr_agg_df, file = "/Users/cui/Desktop/MoTR/pipeline/ancillary_data/motr_agg_df.csv", row.names = FALSE)
Comparison to Provo
# Read in Provo surprisal, frequency and length data
provo_modeling_df = read.csv("../data/provo_stats.csv") %>%
dplyr::select(text_id, sent_id, trigger_idx, word, freq, surp, len) %>%
rename(word_idx = trigger_idx)
provo_modeling_df
NA
# Read in Provo eyetracking data
provo_raw_df = read.csv("../data/provo_eyetracking.csv")
# unique(provo_raw_df$Participant_ID)
# length(unique(provo_raw_df$Participant_ID))
provo_eyetracking_df = provo_raw_df %>%
dplyr::select(Participant_ID, Text_ID, Sentence_Number, Word_In_Sentence_Number, Word, Word_Number, IA_FIRST_FIX_PROGRESSIVE, IA_FIRST_RUN_DWELL_TIME, IA_DWELL_TIME, IA_REGRESSION_PATH_DURATION, IA_REGRESSION_OUT, IA_SKIP) %>%
rename( #first_duration = IA_FIRST_FIXATION_DURATION,
gaze_duration = IA_FIRST_RUN_DWELL_TIME,
total_duration = IA_DWELL_TIME,
go_past_time = IA_REGRESSION_PATH_DURATION,
subj = Participant_ID,
text_id = Text_ID,
sent_id = Sentence_Number,
word_idx = Word_In_Sentence_Number,
word_text_idx = Word_Number, # IA_ID?
word = Word, # Word?
FPReg = IA_REGRESSION_OUT,
skip = IA_SKIP,
ff_progressive = IA_FIRST_FIX_PROGRESSIVE) %>%
mutate(first_duration = gaze_duration) %>%
mutate(gaze_duration = if_else(ff_progressive == 0, 0, as.double(gaze_duration)),
go_past_time = if_else(ff_progressive == 0, 0, as.double(go_past_time))) %>%
dplyr::select(-ff_progressive) %>%
mutate(
gaze_duration = if_else(total_duration == 0, 0, as.double(gaze_duration)),
go_past_time = if_else(total_duration == 0, 0, as.double(go_past_time)),
FPReg = if_else(total_duration == 0, -1, as.double(FPReg)),
first_duration = if_else(total_duration == 0, 0, as.double(first_duration)),
) %>%
# drop_na() %>% # will drop the whole row with all the metrics
gather(metric, value, 7:12) %>%
filter(value >= 0) %>% # filter skipped word in eye tracking data for FPReg
# ==== Remove skipped words
# mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
# filter(zero == F) %>%
# mutate(value = if_else(is.na(value), as.integer(0), as.integer(value))) %>%
# mutate(value = if_else(metric != "FPReg" & is.na(value), as.integer(0), as.integer(value))) %>%
drop_na() %>%
mutate(word = str_trim(word)) %>%
mutate(subj = str_remove(subj, "Sub")) %>%
mutate(subj = as.integer(subj)) %>%
group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
mutate(outlier = if_else(metric != "FPReg" & metric != "skip" & value > (mean(value) + 3 * sd(value) ), T, F)) %>%
filter(outlier == F) %>%
ungroup() #%>%
# Aggregate cross-participant data for all subjects
provo_eyetracking_agg_df = provo_eyetracking_df %>%
group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
summarise(value = mean(value),
nsubj = length(unique(subj))) %>%
ungroup()
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
provo_raw_df %>%
dplyr::select(IA_REGRESSION_OUT) %>%
drop_na() %>%
summarise( m = mean(IA_REGRESSION_OUT))
provo_raw_df %>%
dplyr::select(IA_SKIP) %>%
drop_na() %>%
summarise( m = mean(IA_SKIP))
NA
NA
# Split the eyetracking data in two by subjects to see how well it correlates with itself
provo_eyetracking_subj1_df_temp = provo_eyetracking_df %>%
filter(subj <= 42) %>%
mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
summarise(value = mean(value)) %>%
ungroup() %>%
rename(value_1 = value) #%>%
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
# dplyr::select(-sent_id, -word_idx)
provo_eyetracking_subj1_df = merge(provo_eyetracking_subj1_df_temp, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
arrange(text_id, sent_id, word_idx) %>%
filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
rename(word = word.y) %>%
dplyr::select(text_id, word_text_idx, metric, word, value_1)
# View(provo_eyetracking_subj1_df)
provo_eyetracking_subj2_df = provo_eyetracking_df %>%
filter(subj > 42) %>%
mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
summarise(value = mean(value)) %>%
ungroup() %>%
rename(value_2 = value)%>%
dplyr::select(-sent_id, -word_idx)
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
# View(provo_eyetracking_subj2_df)
provo_eyetr_grouped_df = merge(provo_eyetracking_subj2_df, provo_eyetracking_subj1_df, by=c("text_id", "word_text_idx", "metric")) %>%
# filter(word.x == word.y) %>%
dplyr::select(-word.y) %>%
group_by(metric) %>%
mutate(motr_outlier = if_else(metric != "FPReg" & metric != "skip" & value_1 > (mean(value_1) + 3 * sd(value_1) ), T, F)) %>%
filter(motr_outlier == F) %>%
mutate(eyetr_outlier = if_else(metric != "FPReg" & metric != "skip" & value_2 > (mean(value_2) + 3 * sd(value_2) ), T, F)) %>%
filter(eyetr_outlier == F) %>%
ungroup() %>%
gather(measure, value, c("value_1", "value_2")) %>%
dplyr::select(-motr_outlier, -eyetr_outlier)
# View(provo_eyetr_grouped_df)
provo_df = merge(provo_eyetracking_agg_df, provo_modeling_df, by=c("text_id", "sent_id", "word_idx")) %>%
mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
arrange(text_id, sent_id, word_idx) %>%
rename(eyetr_value = value)
provo_df = merge(provo_df, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
arrange(text_id, sent_id, word_idx) %>%
# almost all the word.x != word.y is because of normalization problem, so we can keep them, instead, deleting some special cases
filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
# filter(word.x == word) #%>%
dplyr::select(-word.x, -word.y) %>%
group_by(metric) %>%
mutate(motr_outlier = if_else(metric != "FPReg" & motr_value > (mean(motr_value) + 3 * sd(motr_value) ), T, F)) %>%
filter(motr_outlier == F) %>%
mutate(eyetr_outlier = if_else(metric != "FPReg" & eyetr_value > (mean(eyetr_value) + 3 * sd(eyetr_value) ), T, F)) %>%
filter(eyetr_outlier == F) %>%
ungroup() %>%
gather(measure, value, c("eyetr_value", "motr_value")) %>%
dplyr::select(-motr_outlier, -eyetr_outlier)
# provo_df
Bayesian – use Stan – motr & eyetr correlation
print("Gaze Duration")
[1] "Gaze Duration"
gd_df = provo_df %>% filter(metric == "gaze_duration") %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value = pmax(eyetr_value, 1),
motr_value = pmax(motr_value, 1)
) %>%
mutate(eyetr_value_log = log(eyetr_value),
motr_value_log = log(motr_value))
print(cor.test(gd_df$eyetr_value, gd_df$motr_value)$estimate)
cor
0.7874
print(cor.test(gd_df$eyetr_value_log, gd_df$motr_value_log)$estimate)
cor
0.6055
# View(gd_df)
gd_df %>%
gather(measure, value, 12:15) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")

NA
# center data around 0.
gd_temp <- gd_df[c("eyetr_value", "motr_value")] %>%
# mutate(eyetr_value = eyetr_value - mean(eyetr_value),
# motr_value = motr_value - mean(motr_value)) %>%
data.matrix()
gd_temp_log <- gd_df[c("eyetr_value_log", "motr_value_log")] %>%
mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log),
motr_value_log = motr_value_log - mean(motr_value_log)) %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(gd_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")
# Plot the second data matrix gd_temp_log
plot(gd_temp_log, pch = 16, col = "red",
main = "Centered Log-Transformed")

gd_data = list(x=gd_temp, N=nrow(gd_temp))
fit_gd = stan(
file="stan_models/bivariate_correlation.stan",
data=gd_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_gd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gd, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds"))
print("Go Past Time")
[1] "Go Past Time"
gpt_df = provo_df %>% filter(metric == "go_past_time") %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value = pmax(eyetr_value, 1),
motr_value = pmax(motr_value, 1)
) %>%
mutate(eyetr_value_log = log(eyetr_value),
motr_value_log = log(motr_value))
print(cor.test(gpt_df$eyetr_value, gpt_df$motr_value)$estimate)
cor
0.7292
print(cor.test(gpt_df$eyetr_value_log, gpt_df$motr_value_log)$estimate)
cor
0.5889
gpt_df %>%
gather(measure, value, 12:15) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
gpt_temp <- gpt_df[c("eyetr_value", "motr_value")] %>% data.matrix()
gpt_temp_log <- gpt_df[c("eyetr_value_log", "motr_value_log")] %>%
mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log),
motr_value_log = motr_value_log - mean(motr_value_log)) %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gpt_temp
plot(gpt_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")
# Plot the second data matrix gpt_temp_log
plot(gpt_temp_log, pch = 16, col = "red",
main = "Centered Log-Transformed")

# -------fit model go past time ----------
gpt_data = list(x=gpt_temp, N=nrow(gpt_temp))
fit_gpt = stan(
file="stan_models/bivariate_correlation.stan",
data=gpt_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_gpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gpt, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds"))
print("Total Duration")
[1] "Total Duration"
td_df = provo_df %>% filter(metric == "total_duration") %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value = pmax(eyetr_value, 1),
motr_value = pmax(motr_value, 1)
) %>%
mutate(eyetr_value_log = log(eyetr_value),
motr_value_log = log(motr_value))
print(cor.test(td_df$eyetr_value, td_df$motr_value)$estimate)
cor
0.7601
print(cor.test(td_df$eyetr_value_log, td_df$motr_value_log)$estimate)
cor
0.6421
td_df %>%
gather(measure, value, 12:15) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
td_temp <- td_df[c("eyetr_value", "motr_value")] %>% data.matrix()
td_temp_log <- td_df[c("eyetr_value_log", "motr_value_log")] %>%
mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log),
motr_value_log = motr_value_log - mean(motr_value_log)) %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(td_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")
# Plot the second data matrix td_temp_log
plot(td_temp_log, pch = 16, col = "red",
main = "Centered Log-Transformed")

# -------fit model total duration ----------
td_data = list(x=td_temp, N=nrow(td_temp))
fit_td = stan(
file="stan_models/bivariate_correlation.stan",
data=td_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_td@stanmodel@dso <- new("cxxdso")
saveRDS(fit_td, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds"))
print("First Pass Regression Prob.")
[1] "First Pass Regression Prob."
reg_df = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5))
print(cor.test(reg_df$eyetr_value, reg_df$motr_value)$estimate)
cor
0.3225
# View(reg_df)
reg_df %>%
gather(measure, value, 12:13) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
reg_temp <- reg_df[c("eyetr_value", "motr_value")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")

# -------fit model FPReg ----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
file="stan_models/bivariate_normal_reg.stan",
data=reg_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor.rds"))
# models with all 0s
fit_gd = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor.rds")
fit_gpt = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor.rds")
fit_td = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor.rds")
fit_reg = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor.rds")
# models for drop 0s
# fit_gd = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds")
# fit_gpt = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds")
# fit_td = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds")
# fit_reg = readRDS("./bayesian_models/bayesian_models_correlation/ranked_motr_eyetr_FPReg_cor.rds")
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
print(fit_gd)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 131.55 0.02 1.46 128.66 130.56 131.55 132.54 134.38 5271 1
mu[2] 213.43 0.04 2.74 208.13 211.57 213.40 215.28 218.88 5555 1
sigma[1] 70.09 0.01 1.04 68.09 69.39 70.07 70.78 72.17 5098 1
sigma[2] 130.80 0.03 1.96 127.05 129.45 130.81 132.10 134.65 4929 1
nu 54.25 0.19 15.25 31.07 43.24 51.93 62.89 90.26 6732 1
rho 0.79 0.00 0.01 0.78 0.79 0.79 0.80 0.81 5263 1
cov[1,1] 4913.42 2.05 146.49 4636.86 4814.49 4909.58 5010.11 5207.93 5102 1
cov[1,2] 7255.71 3.72 241.40 6793.44 7092.28 7254.29 7414.55 7739.22 4201 1
cov[2,1] 7255.71 3.72 241.40 6793.44 7092.28 7254.29 7414.55 7739.22 4201 1
cov[2,2] 17113.69 7.32 513.63 16142.03 16758.30 17110.41 17451.39 18131.15 4929 1
x_rand[1] 140.41 0.72 64.84 23.76 93.88 138.20 183.28 273.84 8002 1
x_rand[2] 233.42 1.33 118.97 28.76 145.37 228.59 312.35 485.69 7985 1
attempt 0.07 0.00 0.27 0.00 0.00 0.00 0.00 1.00 8312 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -25988.72 0.03 1.70 -25992.74 -25989.64 -25988.40 -25987.47 -25986.37 3832 1
Samples were drawn using NUTS(diag_e) at Fri Aug 11 12:08:31 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
print(fit_gpt)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 154.74 0.03 2.04 150.72 153.38 154.72 156.15 158.66 4427 1
mu[2] 219.83 0.05 3.30 213.32 217.62 219.82 222.12 226.18 4679 1
sigma[1] 86.23 0.02 1.59 83.16 85.17 86.21 87.29 89.36 4481 1
sigma[2] 140.41 0.04 2.59 135.27 138.67 140.40 142.12 145.48 4528 1
nu 7.48 0.01 0.68 6.29 7.01 7.43 7.91 8.94 4992 1
rho 0.77 0.00 0.01 0.75 0.76 0.77 0.77 0.79 5877 1
cov[1,1] 7438.72 4.09 273.59 6915.18 7254.05 7432.86 7620.07 7984.43 4482 1
cov[1,2] 9299.88 5.84 377.30 8577.98 9044.57 9292.09 9554.07 10053.55 4172 1
cov[2,1] 9299.88 5.84 377.30 8577.98 9044.57 9292.09 9554.07 10053.55 4172 1
cov[2,2] 19721.18 10.81 727.65 18298.54 19228.50 19713.43 20197.70 21164.45 4535 1
x_rand[1] 172.47 0.99 87.13 25.82 111.76 164.61 224.46 367.21 7717 1
x_rand[2] 250.79 1.58 138.92 29.39 150.75 237.02 333.51 565.33 7718 1
attempt 0.11 0.00 0.36 0.00 0.00 0.00 0.00 1.00 7990 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -27100.13 0.03 1.74 -27104.37 -27101.08 -27099.80 -27098.85 -27097.73 3647 1
Samples were drawn using NUTS(diag_e) at Fri Aug 11 12:20:52 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
print(fit_td)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 179.24 0.03 1.98 175.47 177.92 179.22 180.55 183.12 4792 1
mu[2] 263.66 0.05 3.62 256.71 261.20 263.64 266.10 270.68 4380 1
sigma[1] 89.32 0.02 1.49 86.43 88.30 89.32 90.31 92.28 4916 1
sigma[2] 163.49 0.04 2.76 158.18 161.59 163.45 165.36 169.00 4968 1
nu 18.63 0.05 3.79 12.93 15.97 18.00 20.65 27.82 5111 1
rho 0.77 0.00 0.01 0.76 0.77 0.78 0.78 0.79 5791 1
cov[1,1] 7980.52 3.80 266.89 7470.94 7797.69 7978.25 8155.66 8515.25 4927 1
cov[1,2] 11320.89 6.21 414.97 10526.78 11036.95 11314.00 11594.89 12161.76 4461 1
cov[2,1] 11320.89 6.21 414.97 10526.78 11036.95 11314.00 11594.89 12161.76 4461 1
cov[2,2] 26736.46 12.80 902.86 25019.85 26112.35 26714.39 27344.03 28561.76 4976 1
x_rand[1] 191.67 0.95 84.90 40.17 132.02 186.55 244.73 373.85 8049 1
x_rand[2] 290.88 1.70 152.51 33.10 178.96 280.69 387.99 624.44 8005 1
attempt 0.08 0.00 0.29 0.00 0.00 0.00 0.00 1.00 7786 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -27334.42 0.03 1.75 -27338.56 -27335.34 -27334.10 -27333.14 -27332.05 3439 1
Samples were drawn using NUTS(diag_e) at Fri Aug 11 12:24:09 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.--------------------------------------------"
print(fit_reg)
Inference for Stan model: bivariate_correlation_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
alpha[1] 0.85 0.00 0.02 0.81 0.84 0.85 0.86 0.89 5443 1
alpha[2] 0.15 0.00 0.00 0.14 0.14 0.15 0.15 0.15 7702 1
beta[1] 4.64 0.00 0.14 4.37 4.54 4.64 4.73 4.92 5975 1
beta[2] 2.52 0.00 0.12 2.29 2.44 2.52 2.60 2.76 7877 1
L[1,1] 1.00 NaN 0.00 1.00 1.00 1.00 1.00 1.00 NaN NaN
L[1,2] 0.00 NaN 0.00 0.00 0.00 0.00 0.00 0.00 NaN NaN
L[2,1] 0.45 0.00 0.38 -0.41 0.18 0.52 0.77 0.95 8489 1
L[2,2] 0.78 0.00 0.21 0.31 0.64 0.85 0.96 1.00 6201 1
mu[1] 2.34 0.00 0.05 2.25 2.31 2.34 2.37 2.44 5468 1
mu[2] 1.16 0.00 0.00 1.15 1.16 1.16 1.16 1.16 7701 1
sigma[1] 104.39 0.19 14.73 78.70 93.94 103.18 113.48 136.81 5984 1
sigma[2] 12.55 0.02 1.50 9.91 11.50 12.44 13.51 15.76 7872 1
rho[1,1] 1.00 NaN 0.00 1.00 1.00 1.00 1.00 1.00 NaN NaN
rho[1,2] 0.45 0.00 0.38 -0.41 0.18 0.52 0.77 0.95 8489 1
rho[2,1] 0.45 0.00 0.38 -0.41 0.18 0.52 0.77 0.95 8489 1
rho[2,2] 1.00 0.00 0.00 1.00 1.00 1.00 1.00 1.00 592 1
Sigma[1,1] 11114.32 41.44 3195.37 6193.09 8824.35 10646.59 12878.40 18717.61 5946 1
Sigma[1,2] 583.46 5.69 518.49 -527.26 234.91 654.32 971.41 1433.33 8291 1
Sigma[2,1] 583.46 5.69 518.49 -527.26 234.91 654.32 971.41 1433.33 8291 1
Sigma[2,2] 159.88 0.44 38.81 98.15 132.31 154.69 182.47 248.29 7789 1
x_rand[1] 0.16 0.00 0.14 0.00 0.05 0.12 0.23 0.53 7955 1
x_rand[2] 0.06 0.00 0.12 0.00 0.00 0.00 0.05 0.45 7762 1
lp__ 14798.20 0.03 1.56 14794.21 14797.42 14798.55 14799.33 14800.22 3508 1
Samples were drawn using NUTS(diag_e) at Sat Jul 22 20:39:27 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# stan_trace(fit_gd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_gd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_gd, pars=c("rho", "mu", "sigma", "nu"))
# Gaze Duration
stan_trace(fit_gd)
stan_dens(fit_gd, separate_chains = TRUE)
stan_plot(fit_gd)
# Go Past Time
stan_trace(fit_gpt)
stan_dens(fit_gpt, separate_chains = TRUE)
stan_plot(fit_gpt)
# Total Duration
stan_trace(fit_td)
stan_dens(fit_td, separate_chains = TRUE)
stan_plot(fit_td)
# FPReg
stan_trace(fit_reg)
stan_dens(fit_reg, separate_chains = TRUE)
stan_plot(fit_reg)
p1 <- stan_trace(fit_gd, pars = 'rho', inc_warmup = FALSE)
p2 <- stan_dens(fit_gd, pars = 'rho', separate_chains = TRUE)
p3 <- stan_trace(fit_gd, pars = 'mu[1]', inc_warmup = FALSE)
p4 <- stan_dens(fit_gd, pars = 'mu[1]', separate_chains = TRUE)
p5 <- stan_trace(fit_gd, pars = 'mu[2]', inc_warmup = FALSE)
p6 <- stan_dens(fit_gd, pars = 'mu[2]', separate_chains = TRUE)
p7 <- stan_trace(fit_gd, pars = 'sigma[1]', inc_warmup = FALSE)
p8 <- stan_dens(fit_gd, pars = 'sigma[1]', separate_chains = TRUE)
p9 <- stan_trace(fit_gd, pars = 'sigma[2]', inc_warmup = FALSE)
p10 <- stan_dens(fit_gd, pars = 'sigma[2]', separate_chains = TRUE)
p11 <- stan_trace(fit_gd, pars = 'nu', inc_warmup = FALSE)
p12 <- stan_dens(fit_gd, pars = 'nu', separate_chains = TRUE)
# Use grid.arrange() to arrange the plots
# grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, ncol=2, nrow=6)
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
rho_gd = as.numeric(extract(fit_gd, "rho")[[1]])
mean = mean(rho_gd)
crI = quantile(rho_gd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.7912
HPD: [0.7764, 0.8057]
crI: [0.776, 0.8054]
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
rho_gpt = as.numeric(extract(fit_gpt, "rho")[[1]])
mean = mean(rho_gpt)
crI = quantile(rho_gpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.7678
HPD: [0.7495, 0.7858]
crI: [0.7492, 0.7856]
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
rho_td = as.numeric(extract(fit_td, "rho")[[1]])
mean = mean(rho_td)
crI = quantile(rho_td, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_td), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.775
HPD: [0.7582, 0.7917]
crI: [0.7579, 0.7916]
print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
# rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
rho_reg = as.numeric(extract(fit_reg, "rho")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.7228
HPD: [-0.1241, 1]
crI: [-0.2814, 1]
print('---------------------------- Gaze Duration--------------------------------------------')
gd_rand <- extract(fit_gd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(gd_rand[,1], gd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gd_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(gd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Go Past Time--------------------------------------------')
gpt_rand <- extract(fit_gpt, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(gpt_rand[,1], gpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gpt_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(gpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Total Duration--------------------------------------------')
td_rand <- extract(fit_td, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "Total Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(td_rand[,1], td_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(td_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(td_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(td_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression --------------------------------------------')
reg_rand <- extract(fit_reg, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(reg_rand[,1], reg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(reg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(reg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
model motr eyetr FPReg correlation (eyetr < 0.3)
print("First Pass Regression Prob. all and < 0.3")
[1] "First Pass Regression Prob. all and < 0.3"
reg_df_all = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
# filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5))
reg_df_low_drop0 = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5)) %>%
filter(eyetr_value < 0.3)
reg_df_low = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
# filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5)) %>%
filter(eyetr_value < 0.3)
# mutate(eyetr_value = exp(eyetr_value),
# motr_value = exp(motr_value)
# )
# View(reg_df)
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$estimate)
cor
0.2454
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$p.value)
[1] 0.000000000000000000000000000000000004731
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$estimate)
cor
0.09488
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$p.value)
[1] 0.000006321
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$estimate)
cor
0.05585
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$p.value)
[1] 0.1193
# View(reg_df)
reg_df_low %>%
gather(measure, value, 12:13) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
reg_temp_all <- reg_df_all[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low <- reg_df_low[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low_drop0 <- reg_df_low_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(reg_temp_low, pch = 16, col = "blue",
main = "Not Log-Transformed")

# -------fit model FPReg < 0.3 ----------
reg_data = list(x=reg_temp_all, N=nrow(reg_temp_all))
fit_reg = stan(
# file="stan_models/bivariate_beta_correlation_reg.stan",
file = "stan_models/bivariate_normal_reg.stan",
data=reg_data,
iter=4000,
chains=4,
cores=4,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_all_data_drop0s.rds"))
model motr eyetr FPReg correlation (eyetr >= 0.3)
print("First Pass Regression Prob. >= 0.3")
[1] "First Pass Regression Prob. >= 0.3"
reg_df_high_drop0 = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5)) %>%
filter(eyetr_value >= 0.3)
reg_df_high = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
# filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5)) %>%
filter(eyetr_value >= 0.3)
# mutate(eyetr_value = exp(eyetr_value),
# motr_value = exp(motr_value)
# )
# View(reg_df)
print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$estimate)
cor
0.3952
print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$p.value)
[1] 0.000000000009391
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$estimate)
cor
0.489
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$p.value)
[1] 0.000000001007
# View(reg_df)
reg_df_high %>%
gather(measure, value, 12:13) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
reg_temp_high <- reg_df_high[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_high_drop0 <- reg_df_high_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))

# Plot the first data matrix td_temp
plot(reg_temp_all, pch = 16, col = "blue",
main = "FPReg not logged all data")
plot(reg_temp_low, pch = 16, col = "blue",
main = "FPReg not logged eyetr < 0.3 ")
plot(reg_temp_high, pch = 16, col = "blue",
main = "FPReg not logged eyetr >= 0.3")

# -------fit model FPReg >= 0.3 ----------
reg_data = list(x=reg_temp_high_drop0, N=nrow(reg_temp_high_drop0))
fit_reg = stan(
# file="stan_models/bivariate_beta_correlation_reg.stan",
file = "stan_models/bivariate_normal_reg.stan",
data=reg_data,
iter=4000,
chains=4,
cores=4,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0(".//bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_03-1_drop0s.rds"))
fit_mreg_all = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_all_data.rds")
fit_mreg_all_drop0 = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_all_data_drop0s.rds")
fit_mreg_low = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_00-03.rds")
fit_mreg_low_drop0 = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_00-03_drop0s.rds")
fit_mreg_high = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_03-1.rds")
fit_mreg_high_drop0 = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_03-1_drop0s.rds")
print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. all data --------------------------------------------"
print(fit_mreg_all)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.12 0.00 0.00 0.12 0.12 0.12 0.13 0.13 5811 1
mu[2] 0.03 0.00 0.00 0.02 0.03 0.03 0.03 0.03 3384 1
sigma[1] 0.08 0.00 0.00 0.07 0.07 0.08 0.08 0.08 3694 1
sigma[2] 0.05 0.00 0.00 0.05 0.05 0.05 0.05 0.06 3040 1
nu 2.42 0.00 0.14 2.16 2.33 2.42 2.52 2.71 3250 1
rho 0.16 0.00 0.02 0.11 0.14 0.16 0.17 0.20 8176 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 3693 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6348 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6348 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 3053 1
x_rand[1] 0.16 0.00 0.12 0.02 0.09 0.14 0.20 0.40 7811 1
x_rand[2] 0.07 0.00 0.08 0.00 0.03 0.05 0.09 0.25 7966 1
attempt 0.63 0.01 1.03 0.00 0.00 0.00 1.00 3.00 7919 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 7450.58 0.03 1.78 7446.22 7449.65 7450.92 7451.89 7452.98 3519 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 23:08:09 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------"
print(fit_mreg_all_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.15 0.00 0.00 0.14 0.15 0.15 0.15 0.16 8507 1
mu[2] 0.14 0.00 0.00 0.13 0.14 0.14 0.14 0.14 7923 1
sigma[1] 0.09 0.00 0.00 0.08 0.08 0.09 0.09 0.09 7186 1
sigma[2] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.06 6227 1
nu 2.78 0.00 0.23 2.37 2.62 2.77 2.93 3.26 7008 1
rho 0.18 0.00 0.04 0.11 0.16 0.18 0.21 0.26 9833 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 7173 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8599 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8599 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6234 1
x_rand[1] 0.18 0.00 0.13 0.02 0.10 0.16 0.22 0.43 7699 1
x_rand[2] 0.15 0.00 0.08 0.03 0.10 0.14 0.18 0.34 8175 1
attempt 0.16 0.00 0.43 0.00 0.00 0.00 0.00 1.00 7917 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 2566.44 0.03 1.75 2562.18 2565.53 2566.76 2567.72 2568.88 3487 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 23:18:11 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------"
print(fit_mreg_low)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.12 0.00 0.00 0.11 0.12 0.12 0.12 0.12 8566 1
mu[2] 0.03 0.00 0.00 0.03 0.03 0.03 0.04 0.04 4981 1
sigma[1] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.07 7280 1
sigma[2] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.07 4552 1
nu 5.54 0.01 0.51 4.62 5.19 5.51 5.86 6.62 4637 1
rho 0.10 0.00 0.02 0.06 0.09 0.10 0.12 0.15 8596 1
cov[1,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7299 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8259 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8259 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4571 1
x_rand[1] 0.13 0.00 0.07 0.02 0.08 0.12 0.17 0.28 8277 1
x_rand[2] 0.07 0.00 0.06 0.00 0.03 0.06 0.10 0.21 7280 1
attempt 0.53 0.01 0.90 0.00 0.00 0.00 1.00 3.00 8006 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 7794.82 0.03 1.78 7790.50 7793.88 7795.16 7796.13 7797.26 3232 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 20:52:03 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------"
print(fit_mreg_low_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.13 0.00 0.00 0.13 0.13 0.13 0.13 0.14 10534 1
mu[2] 0.13 0.00 0.00 0.13 0.13 0.13 0.14 0.14 8839 1
sigma[1] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.07 8534 1
sigma[2] 0.06 0.00 0.00 0.06 0.06 0.06 0.07 0.07 7122 1
nu 6.50 0.01 0.96 4.90 5.83 6.40 7.06 8.73 6790 1
rho 0.07 0.00 0.04 0.00 0.05 0.07 0.10 0.15 10356 1
cov[1,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8526 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 10435 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 10435 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7123 1
x_rand[1] 0.14 0.00 0.07 0.02 0.09 0.13 0.18 0.28 8031 1
x_rand[2] 0.14 0.00 0.07 0.02 0.10 0.14 0.18 0.29 8006 1
attempt 0.08 0.00 0.30 0.00 0.00 0.00 0.00 1.00 7642 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 2737.21 0.03 1.76 2732.81 2736.27 2737.56 2738.50 2739.62 3992 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 20:30:02 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------"
print(fit_mreg_high)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.45 0.00 0.01 0.44 0.45 0.45 0.46 0.47 5045 1
mu[2] 0.15 0.00 0.01 0.13 0.15 0.15 0.16 0.18 4628 1
sigma[1] 0.11 0.00 0.01 0.10 0.11 0.11 0.12 0.12 5622 1
sigma[2] 0.15 0.00 0.01 0.13 0.15 0.15 0.16 0.17 4499 1
nu 25.71 0.18 12.83 9.59 16.53 22.89 31.70 58.03 5109 1
rho 0.41 0.00 0.06 0.30 0.37 0.41 0.45 0.51 7137 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 5625 1
cov[1,2] 0.01 0.00 0.00 0.00 0.01 0.01 0.01 0.01 4842 1
cov[2,1] 0.01 0.00 0.00 0.00 0.01 0.01 0.01 0.01 4842 1
cov[2,2] 0.02 0.00 0.00 0.02 0.02 0.02 0.03 0.03 4553 1
x_rand[1] 0.47 0.00 0.11 0.25 0.39 0.47 0.54 0.70 7964 1
x_rand[2] 0.20 0.00 0.13 0.01 0.10 0.19 0.28 0.49 7829 1
attempt 0.19 0.01 0.47 0.00 0.00 0.00 0.00 1.00 7881 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 579.26 0.03 1.75 575.10 578.35 579.56 580.56 581.68 3529 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 22:25:38 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------"
print(fit_mreg_high_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.48 0.00 0.01 0.46 0.48 0.48 0.49 0.51 6409 1
mu[2] 0.27 0.00 0.01 0.25 0.26 0.27 0.28 0.30 6397 1
sigma[1] 0.12 0.00 0.01 0.11 0.12 0.12 0.13 0.14 6555 1
sigma[2] 0.16 0.00 0.01 0.14 0.15 0.16 0.17 0.18 6281 1
nu 32.85 0.16 15.27 11.84 21.65 30.12 40.80 69.54 8953 1
rho 0.51 0.00 0.07 0.37 0.47 0.52 0.56 0.64 6986 1
cov[1,1] 0.02 0.00 0.00 0.01 0.01 0.02 0.02 0.02 6491 1
cov[1,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 5145 1
cov[2,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 5145 1
cov[2,2] 0.03 0.00 0.00 0.02 0.02 0.03 0.03 0.03 6242 1
x_rand[1] 0.49 0.00 0.13 0.25 0.41 0.49 0.57 0.74 8129 1
x_rand[2] 0.29 0.00 0.15 0.04 0.18 0.28 0.38 0.60 7467 1
attempt 0.06 0.00 0.24 0.00 0.00 0.00 0.00 1.00 7913 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 300.28 0.03 1.75 296.05 299.35 300.60 301.58 302.70 3555 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 22:31:08 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# # FPReg all data
# stan_trace(fit_mreg_all)
# stan_dens(fit_mreg_all, separate_chains = TRUE)
# stan_plot(fit_mreg_all)
# stan_trace(fit_mreg_all_drop0)
# stan_dens(fit_mreg_all_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_all_drop0)
# # FPReg < 0.3
# stan_trace(fit_mreg_low)
# stan_dens(fit_mreg_low, separate_chains = TRUE)
# stan_plot(fit_mreg_low)
#
# stan_trace(fit_mreg_low_drop0)
# stan_dens(fit_mreg_low_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_low_drop0)
# FPReg > 0.3
stan_trace(fit_mreg_high)
'pars' not specified. Showing first 10 parameters by default.

stan_dens(fit_mreg_high, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.

stan_plot(fit_mreg_high)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

stan_trace(fit_mreg_high_drop0)
'pars' not specified. Showing first 10 parameters by default.

stan_dens(fit_mreg_high_drop0, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.

stan_plot(fit_mreg_high_drop0)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

print('---------------------------- First Pass Regression all data--------------------------------------------')
[1] "---------------------------- First Pass Regression all data--------------------------------------------"
rho_mreg_all = as.numeric(extract(fit_mreg_all, "rho")[[1]])
mean = mean(rho_mreg_all)
crI = quantile(rho_mreg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.1574
HPD: [0.1139, 0.2017]
crI: [0.1134, 0.2014]
print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression all data no 0s--------------------------------------------"
rho_mreg_all_drop0 = as.numeric(extract(fit_mreg_all_drop0, "rho")[[1]])
mean = mean(rho_mreg_all_drop0)
crI = quantile(rho_mreg_all_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.1843
HPD: [0.1106, 0.2573]
crI: [0.1114, 0.2581]
print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3--------------------------------------------"
rho_mreg_low = as.numeric(extract(fit_mreg_low, "rho")[[1]])
mean = mean(rho_mreg_low)
crI = quantile(rho_mreg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.103
HPD: [0.05775, 0.1468]
crI: [0.05805, 0.1475]
print('---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------"
rho_mreg_low_drop0 = as.numeric(extract(fit_mreg_low_drop0, "rho")[[1]])
mean = mean(rho_mreg_low_drop0)
crI = quantile(rho_mreg_low_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.07362
HPD: [-0.000616, 0.1514]
crI: [-0.003759, 0.1491]
print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3--------------------------------------------"
rho_mreg_high = as.numeric(extract(fit_mreg_high, "rho")[[1]])
mean = mean(rho_mreg_high)
crI = quantile(rho_mreg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.4102
HPD: [0.3001, 0.5155]
crI: [0.2992, 0.5146]
print('---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------"
rho_mreg_high_drop0 = as.numeric(extract(fit_mreg_high_drop0, "rho")[[1]])
mean = mean(rho_mreg_high_drop0)
crI = quantile(rho_mreg_high_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.514
HPD: [0.3797, 0.6456]
crI: [0.3728, 0.6412]
print('---------------------------- First Pass Regression all data --------------------------------------------')
mallreg_rand <- extract(fit_mreg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mallreg_rand[,1], mallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
mallreg_rand_drop0 <- extract(fit_mreg_all_drop0, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mallreg_rand_drop0[,1], mallreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mallreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
mlowreg_rand <- extract(fit_mreg_low, "x_rand")[[1]]
print(mlowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mlowreg_rand[,1], mlowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mlowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression < 0.3 no 0s --------------------------------------------')
mlowreg_rand_drop0 <- extract(fit_mreg_low_drop0, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mlowreg_rand_drop0[,1], mlowreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low_drop0, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mlowreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
mhighreg_rand_samples <- extract(fit_mreg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(mhighreg_rand_samples), 900)
mhighreg_rand <- mhighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mhighreg_rand[,1], mhighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mhighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
# print('---------------------------- First Pass Regression >= 0.3 no 0s --------------------------------------------')
mhighreg_rand_drop0_samples <- extract(fit_mreg_high_drop0, "x_rand")[[1]]
selected_indices <- sample(1:nrow(mhighreg_rand_drop0_samples), 900)
mhighreg_rand_drop0 <- mhighreg_rand_drop0_samples[selected_indices, ]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mhighreg_rand_drop0[,1], mhighreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high_drop0, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mhighreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
model eye tracking to eye tracking correlation
print("Gaze Duration")
[1] "Gaze Duration"
# View(provo_eyetr_grouped_df)
egd_df = provo_eyetr_grouped_df %>% filter(metric == "gaze_duration") %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1),
eyetr_value_2 = pmax(value_2, 1)
)
print(cor.test(egd_df$eyetr_value_1, egd_df$eyetr_value_2)$estimate)
cor
0.9146
# View(egd_df)
egd_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
egd_temp <- egd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(egd_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")

egd_data = list(x=egd_temp, N=nrow(egd_temp))
fit_egd = stan(
file="stan_models/bivariate_correlation.stan",
data=egd_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_egd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egd, file = paste0("./bayesian_models/bayesian_models_correlation/cor_bivariate/eyetr_eyetr_gaze_duration_cor.rds"))
print("Go Past Time")
[1] "Go Past Time"
egpt_df = provo_eyetr_grouped_df %>% filter(metric == "go_past_time") %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1),
eyetr_value_2 = pmax(value_2, 1)
)
print(cor.test(egpt_df$eyetr_value_1, egpt_df$eyetr_value_2)$estimate)
cor
0.9134
# View(egd_df)
egpt_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
egpt_temp <- egpt_df[c("eyetr_value_1", "eyetr_value_2")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(egpt_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")

egpt_data = list(x=egpt_temp, N=nrow(egpt_temp))
fit_egpt = stan(
file="stan_models/bivariate_correlation.stan",
data=egpt_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_egpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egpt, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_go_past_time_cor.rds"))
print("Total Duration")
[1] "Total Duration"
etd_df = provo_eyetr_grouped_df %>% filter(metric == "total_duration") %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1),
eyetr_value_2 = pmax(value_2, 1)
)
print(cor.test(etd_df$eyetr_value_1, etd_df$eyetr_value_2)$estimate)
cor
0.9272
# View(egd_df)
etd_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
etd_temp <- etd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(etd_temp, pch = 16, col = "blue",
main = "Total Duration Not Log-Transformed")

etd_data = list(x=etd_temp, N=nrow(etd_temp))
fit_etd = stan(
file="stan_models/bivariate_correlation.stan",
data=etd_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_etd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_etd, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_total_duration_cor.rds"))
print("Fisrt Pass Regression Prob.")
[1] "Fisrt Pass Regression Prob."
ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1e-5),
eyetr_value_2 = pmax(value_2, 1e-5)
)
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
cor
0.741
# View(egd_df)
ereg_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
drop_na() %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
main = "FPReg Not Log-Transformed")

# -------fit model FPReg ----------
# View(ereg_temp)
ereg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_ereg = stan(
file="stan_models/bivariate_normal_reg.stan",
data=ereg_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_ereg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_ereg, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor5.rds"))
fit_egd = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_gaze_duration_cor.rds")
fit_egpt = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_go_past_time_cor.rds")
fit_etd = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_total_duration_cor.rds")
fit_ereg = readRDS("./bayesian_models/bayesian_models_correlation/ranked_eyetr_eyetr_FPReg_cor.rds")
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
print(fit_egd)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 132.90 0.02 1.60 129.78 131.84 132.89 133.94 136.08 4438 1
mu[2] 122.64 0.02 1.41 119.84 121.71 122.66 123.58 125.43 4474 1
sigma[1] 73.79 0.02 1.17 71.50 73.01 73.78 74.56 76.15 3691 1
sigma[2] 65.41 0.02 1.04 63.37 64.74 65.40 66.09 67.51 3824 1
nu 16.48 0.04 2.69 12.28 14.56 16.17 18.01 22.73 4385 1
rho 0.92 0.00 0.00 0.92 0.92 0.92 0.93 0.93 5001 1
cov[1,1] 5446.60 2.85 173.42 5112.56 5330.91 5443.47 5558.73 5798.41 3708 1
cov[1,2] 4456.81 2.40 145.17 4176.71 4361.17 4454.59 4548.67 4748.93 3670 1
cov[2,1] 4456.81 2.40 145.17 4176.71 4361.17 4454.59 4548.67 4748.93 3670 1
cov[2,2] 4279.32 2.20 136.28 4015.83 4191.32 4277.37 4367.26 4557.54 3830 1
x_rand[1] 142.07 0.81 70.30 21.70 91.51 136.29 187.12 297.49 7616 1
x_rand[2] 130.86 0.71 61.94 22.87 86.11 127.74 170.53 262.81 7694 1
attempt 0.06 0.00 0.26 0.00 0.00 0.00 0.00 1.00 8003 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -24130.71 0.03 1.78 -24135.13 -24131.62 -24130.37 -24129.41 -24128.31 3161 1
Samples were drawn using NUTS(diag_e) at Fri Aug 11 12:30:12 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
print(fit_egpt)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 158.88 0.03 2.18 154.63 157.39 158.87 160.38 163.18 4160 1
mu[2] 145.58 0.03 1.91 141.86 144.27 145.57 146.89 149.34 4249 1
sigma[1] 93.78 0.03 1.66 90.59 92.64 93.80 94.90 97.15 3659 1
sigma[2] 82.27 0.03 1.45 79.48 81.26 82.27 83.25 85.15 3180 1
nu 8.83 0.02 0.95 7.21 8.16 8.75 9.42 10.97 3630 1
rho 0.92 0.00 0.00 0.92 0.92 0.92 0.93 0.93 5399 1
cov[1,1] 8797.65 5.14 311.03 8206.42 8582.36 8797.95 9005.48 9437.99 3654 1
cov[1,2] 7124.68 4.37 256.74 6640.23 6943.57 7123.16 7299.14 7643.95 3458 1
cov[2,1] 7124.68 4.37 256.74 6640.23 6943.57 7123.16 7299.14 7643.95 3458 1
cov[2,2] 6770.74 4.24 239.06 6316.85 6603.30 6768.37 6931.34 7249.73 3172 1
x_rand[1] 178.07 1.04 92.17 24.26 112.53 169.84 234.18 379.79 7877 1
x_rand[2] 162.02 0.91 81.68 25.72 103.73 154.98 211.07 340.38 8056 1
attempt 0.08 0.00 0.30 0.00 0.00 0.00 0.00 1.00 8200 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -25277.02 0.03 1.72 -25281.22 -25277.95 -25276.68 -25275.76 -25274.67 3158 1
Samples were drawn using NUTS(diag_e) at Fri Aug 11 12:36:02 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
print(fit_etd)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 185.81 0.03 2.07 181.78 184.42 185.77 187.21 189.91 4202 1
mu[2] 166.28 0.03 1.83 162.73 165.04 166.25 167.50 169.93 4211 1
sigma[1] 94.20 0.03 1.57 91.11 93.15 94.20 95.27 97.30 3447 1
sigma[2] 83.97 0.02 1.39 81.27 83.02 83.96 84.90 86.69 3383 1
nu 12.25 0.02 1.64 9.57 11.09 12.09 13.20 15.94 4615 1
rho 0.94 0.00 0.00 0.93 0.93 0.94 0.94 0.94 5000 1
cov[1,1] 8876.18 5.03 295.39 8301.65 8677.64 8873.28 9075.84 9466.62 3443 1
cov[1,2] 7402.10 4.37 250.04 6915.30 7234.88 7398.57 7570.50 7901.07 3274 1
cov[2,1] 7402.10 4.37 250.04 6915.30 7234.88 7398.57 7570.50 7901.07 3274 1
cov[2,2] 7052.58 4.00 232.76 6605.40 6892.44 7049.39 7208.70 7514.46 3380 1
x_rand[1] 195.43 1.04 92.36 34.49 130.88 189.65 253.14 395.07 7886 1
x_rand[2] 175.22 0.94 82.69 30.41 117.02 170.67 226.08 355.46 7783 1
attempt 0.05 0.00 0.22 0.00 0.00 0.00 0.00 1.00 7894 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -25256.96 0.03 1.72 -25261.06 -25257.90 -25256.64 -25255.69 -25254.56 2928 1
Samples were drawn using NUTS(diag_e) at Fri Aug 11 13:07:03 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.--------------------------------------------"
print(fit_ereg)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 1255.64 0.12 10.80 1234.54 1248.36 1255.74 1262.93 1276.79 8627 1
mu[2] 1252.93 0.12 10.87 1231.37 1245.54 1253.06 1260.17 1274.30 8702 1
sigma[1] 765.79 0.09 8.39 749.70 760.10 765.62 771.49 782.53 9017 1
sigma[2] 770.39 0.09 8.39 754.19 764.66 770.34 775.92 787.09 8416 1
nu 158.18 0.28 29.26 108.58 137.85 155.21 176.28 223.29 10599 1
rho 0.63 0.00 0.01 0.61 0.63 0.63 0.64 0.66 9459 1
cov[1,1] 586505.54 135.48 12860.89 562043.91 577744.85 586178.32 595202.01 612347.58 9011 1
cov[1,2] 373981.79 127.91 11325.06 351915.15 366391.42 373981.76 381542.13 396695.84 7839 1
cov[2,1] 373981.79 127.91 11325.06 351915.15 366391.42 373981.76 381542.13 396695.84 7839 1
cov[2,2] 593566.17 140.99 12933.82 568805.46 584710.34 593422.01 602051.63 619514.48 8415 1
x_rand[1] 1373.55 7.61 682.32 169.56 876.21 1347.83 1820.73 2770.16 8038 1
x_rand[2] 1385.00 7.74 690.70 179.87 876.47 1358.67 1839.94 2853.36 7965 1
attempt 0.09 0.00 0.32 0.00 0.00 0.00 0.00 1.00 7869 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -80031.49 0.03 1.74 -80035.79 -80032.39 -80031.19 -80030.22 -80029.08 3690 1
Samples were drawn using NUTS(diag_e) at Wed Aug 9 22:01:28 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# stan_trace(fit_egd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_egd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_egd, pars=c("rho", "mu", "sigma", "nu"))
# Gaze Duration
stan_trace(fit_egd)
stan_dens(fit_egd, separate_chains = TRUE)
stan_plot(fit_egd)
# Go Past Time
stan_trace(fit_egpt)
stan_dens(fit_egpt, separate_chains = TRUE)
stan_plot(fit_egpt)
# Total Duration
stan_trace(fit_etd)
stan_dens(fit_etd, separate_chains = TRUE)
stan_plot(fit_etd)
# FPReg
stan_trace(fit_ereg)
stan_dens(fit_ereg, separate_chains = TRUE)
stan_plot(fit_ereg)
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
rho_egd = as.numeric(extract(fit_egd, "rho")[[1]])
mean = mean(rho_egd)
crI = quantile(rho_egd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.9231
HPD: [0.917, 0.9292]
crI: [0.9167, 0.9291]
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
rho_egpt = as.numeric(extract(fit_egpt, "rho")[[1]])
mean = mean(rho_egpt)
crI = quantile(rho_egpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.9231
HPD: [0.9169, 0.9294]
crI: [0.9166, 0.9292]
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
rho_etd = as.numeric(extract(fit_etd, "rho")[[1]])
mean = mean(rho_etd)
crI = quantile(rho_etd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_etd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.9355
HPD: [0.9301, 0.9406]
crI: [0.9301, 0.9406]
print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
rho_ereg = as.numeric(extract(fit_ereg, "rho")[[1]])
mean = mean(rho_ereg)
crI = quantile(rho_ereg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.6338
HPD: [0.6095, 0.6552]
crI: [0.61, 0.6558]
print('---------------------------- Gaze Duration--------------------------------------------')
egd_rand <- extract(fit_egd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(egd_rand[,1], egd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egd_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(egd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Go Past Time--------------------------------------------')
egpt_rand <- extract(fit_egpt, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(egpt_rand[,1], egpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egpt_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(egpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Total Duration--------------------------------------------')
etd_rand <- extract(fit_etd, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Total Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(etd_rand[,1], etd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(etd_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(etd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(etd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression --------------------------------------------')
ereg_rand <- extract(fit_ereg, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "First Pass Regression") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(ereg_rand[,1], ereg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(ereg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ereg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
Bayesian – correlation between MoTR and word level statistics
print("Log Frequency")
[1] "Log Frequency"
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$freq)$estimate)
cor
-0.7454
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$freq)$estimate)
cor
-0.8069
# View(stats_cor_df)
stats_cor_df %>%
gather(measure, value, c(7, 13)) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
mfreq_temp <- stats_cor_df[c("motr_value", "freq")] %>%
data.matrix()
efreq_temp <- stats_cor_df[c("eyetr_value", "freq")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(mfreq_temp, pch = 16, col = "blue",
main = "MoTR RTs and Word Frequency")
# Plot the first data matrix gd_temp
plot(efreq_temp, pch = 16, col = "blue",
main = "EyeTR RTs and Word Frequency")

motr & frequency
mfreq_data = list(x=mfreq_temp, N=nrow(mfreq_temp))
fit_mfreq = stan(
file="stan_models/stats_correlation.stan",
data=mfreq_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_mfreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mfreq, file = paste0("./bayesian_models/bayesian_models_correlation/motr_freq_cor.rds"))
eyetr & frequency
efreq_data = list(x=efreq_temp, N=nrow(efreq_temp))
fit_efreq = stan(
file="stan_models/stats_correlation.stan",
data=efreq_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_efreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_efreq, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_freq_cor.rds"))
print("Length")
[1] "Length"
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$len)$estimate)
cor
0.8644
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$len)$estimate)
cor
0.8597
# View(stats_cor_df)
stats_cor_df %>%
gather(measure, value, c(9, 13)) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
mlen_temp <- stats_cor_df[c("motr_value", "len")] %>%
data.matrix()
elen_temp <- stats_cor_df[c("eyetr_value", "len")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(mlen_temp, pch = 16, col = "blue",
main = "MoTR RTs and Word Length")
# Plot the first data matrix gd_temp
plot(elen_temp, pch = 16, col = "blue",
main = "EyeTR RTs and Word Length")

motr & length
mlen_data = list(x=mlen_temp, N=nrow(mlen_temp))
fit_mlen = stan(
file="stan_models/stats_correlation_len_normal.stan",
data=mlen_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_mlen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mlen, file = paste0("./bayesian_models/bayesian_models_correlation/motr_len_cor.rds"))
eyetr & length
elen_data = list(x=elen_temp, N=nrow(elen_temp))
fit_elen = stan(
file="stan_models/stats_correlation_len_normal.stan",
data=elen_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_elen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_elen, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_len_cor.rds"))
print("Surprisal")
[1] "Surprisal"
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$surp)$estimate)
cor
0.4978
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$surp)$estimate)
cor
0.5683
# View(stats_cor_df)
stats_cor_df %>%
gather(measure, value, c(8, 13)) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
msurp_temp <- stats_cor_df[c("motr_value", "surp")] %>%
data.matrix()
esurp_temp <- stats_cor_df[c("eyetr_value", "surp")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(msurp_temp, pch = 16, col = "blue",
main = "MoTR RTs and Surprisal")
# Plot the first data matrix gd_temp
plot(esurp_temp, pch = 16, col = "blue",
main = "EyeTR RTs and Surprisal")

motr & surprisal
msurp_data = list(x=msurp_temp, N=nrow(msurp_temp))
fit_msurp = stan(
file="stan_models/stats_correlation.stan",
data=msurp_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_msurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_msurp, file = paste0("./bayesian_models/bayesian_models_correlation/motr_surp_cor.rds"))
eyetr & surprisal
esurp_data = list(x=esurp_temp, N=nrow(esurp_temp))
fit_esurp = stan(
file="stan_models/stats_correlation.stan",
data=esurp_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_esurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_esurp, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_surp_cor.rds"))
fit_mfreq = readRDS("./bayesian_models/bayesian_models_correlation/motr_freq_cor.rds")
fit_efreq = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_freq_cor.rds")
fit_mlen = readRDS("./bayesian_models/bayesian_models_correlation/motr_len_cor.rds")
fit_elen = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_len_cor.rds")
fit_msurp = readRDS("./bayesian_models/bayesian_models_correlation/motr_surp_cor.rds")
fit_esurp = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_surp_cor.rds")
print('---------------------------- MoTR & Log Frequency --------------------------------------------')
[1] "---------------------------- MoTR & Log Frequency --------------------------------------------"
print(fit_mfreq)
Inference for Stan model: stats_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 206.16 0.02 1.93 202.44 204.86 206.17 207.47 209.88 8522 1
mu[2] 5.80 0.00 0.02 5.77 5.79 5.80 5.81 5.84 8983 1
sigma[1] 138.91 0.02 1.50 136.02 137.87 138.92 139.91 141.86 8278 1
sigma[2] 1.34 0.00 0.01 1.32 1.33 1.34 1.35 1.37 8274 1
nu 99.82 0.24 23.02 62.37 83.46 97.35 113.85 150.17 9319 1
rho -0.76 0.00 0.01 -0.78 -0.77 -0.76 -0.76 -0.75 8377 1
cov[1,1] 19299.02 4.59 417.70 18501.76 19006.99 19298.28 19575.75 20123.93 8268 1
cov[1,2] -141.93 0.04 3.34 -148.59 -144.18 -141.93 -139.65 -135.44 6864 1
cov[2,1] -141.93 0.04 3.34 -148.59 -144.18 -141.93 -139.65 -135.44 6864 1
cov[2,2] 1.80 0.00 0.04 1.73 1.78 1.80 1.83 1.87 8265 1
x_rand[1] 227.30 1.38 121.53 20.78 136.69 221.29 306.64 484.97 7799 1
x_rand[2] 5.66 0.01 1.26 3.09 4.82 5.70 6.53 8.03 8041 1
attempt 0.07 0.00 0.27 0.00 0.00 0.00 0.00 1.00 7926 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -36159.81 0.03 1.73 -36164.02 -36160.74 -36159.48 -36158.53 -36157.45 3770 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 15:18:27 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- EyeTR & Log Frequency --------------------------------------------')
[1] "---------------------------- EyeTR & Log Frequency --------------------------------------------"
print(fit_efreq)
Inference for Stan model: stats_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 129.58 0.01 0.95 127.71 128.96 129.59 130.21 131.45 7967 1
mu[2] 5.79 0.00 0.02 5.75 5.78 5.79 5.80 5.82 8371 1
sigma[1] 73.11 0.01 0.75 71.66 72.60 73.10 73.61 74.61 7440 1
sigma[2] 1.34 0.00 0.01 1.31 1.33 1.34 1.35 1.36 7478 1
nu 91.76 0.24 21.94 56.83 76.18 89.08 104.50 142.84 8551 1
rho -0.82 0.00 0.01 -0.83 -0.82 -0.82 -0.81 -0.81 8039 1
cov[1,1] 5345.65 1.28 110.19 5134.97 5270.29 5343.14 5418.78 5566.32 7432 1
cov[1,2] -80.04 0.02 1.70 -83.43 -81.17 -80.03 -78.89 -76.73 6431 1
cov[2,1] -80.04 0.02 1.70 -83.43 -81.17 -80.03 -78.89 -76.73 6431 1
cov[2,2] 1.79 0.00 0.04 1.72 1.77 1.79 1.81 1.86 7476 1
x_rand[1] 137.10 0.77 67.73 16.30 87.55 134.62 182.36 275.36 7640 1
x_rand[2] 5.68 0.01 1.27 3.13 4.82 5.68 6.55 8.11 7779 1
attempt 0.04 0.00 0.19 0.00 0.00 0.00 0.00 1.00 8076 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -32784.74 0.03 1.72 -32789.05 -32785.64 -32784.43 -32783.48 -32782.37 4038 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 15:44:55 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- MoTR & Length --------------------------------------------')
[1] "---------------------------- MoTR & Length --------------------------------------------"
print(fit_mlen)
Inference for Stan model: stats_correlation_len_normal.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 197.87 0.03 2.04 193.81 196.48 197.88 199.27 201.86 4675 1
mu[2] 4.40 0.00 0.04 4.33 4.38 4.40 4.43 4.48 4776 1
sigma[1] 136.99 0.03 1.75 133.57 135.79 136.99 138.19 140.43 4848 1
sigma[2] 2.51 0.00 0.03 2.45 2.49 2.51 2.53 2.58 4665 1
nu 17.03 0.04 2.84 12.51 15.05 16.64 18.61 23.71 4796 1
rho 0.89 0.00 0.00 0.88 0.88 0.89 0.89 0.89 5940 1
cov[1,1] 18768.69 6.88 479.04 17841.00 18438.93 18766.08 19096.47 19721.77 4845 1
cov[1,2] 305.06 0.12 7.91 289.81 299.65 305.00 310.39 320.87 4341 1
cov[2,1] 305.06 0.12 7.91 289.81 299.65 305.00 310.39 320.87 4341 1
cov[2,2] 6.31 0.00 0.17 5.98 6.19 6.30 6.42 6.64 4664 1
x_rand[1] 219.00 1.39 122.76 20.11 127.61 208.39 297.11 488.02 7751 1
x_rand[2] 5.26 0.03 2.36 1.00 4.00 5.00 7.00 10.00 7861 1
attempt 0.10 0.00 0.33 0.00 0.00 0.00 0.00 1.00 7797 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -38437.19 0.03 1.75 -38441.43 -38438.10 -38436.86 -38435.92 -38434.79 3210 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 19:35:18 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- EyeTR & Length --------------------------------------------')
[1] "---------------------------- EyeTR & Length --------------------------------------------"
print(fit_elen)
Inference for Stan model: stats_correlation_len_normal.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 125.47 0.01 1.05 123.48 124.75 125.46 126.18 127.53 5037 1
mu[2] 4.45 0.00 0.04 4.38 4.43 4.45 4.48 4.52 4809 1
sigma[1] 72.58 0.01 0.88 70.87 71.98 72.57 73.15 74.31 4816 1
sigma[2] 2.49 0.00 0.03 2.42 2.47 2.49 2.51 2.55 4786 1
nu 17.29 0.04 2.69 13.08 15.35 16.97 18.80 23.46 4894 1
rho 0.88 0.00 0.00 0.87 0.88 0.88 0.89 0.89 5800 1
cov[1,1] 5268.20 1.84 127.51 5022.34 5181.60 5266.78 5351.59 5522.02 4819 1
cov[1,2] 159.48 0.06 3.98 151.79 156.77 159.44 162.13 167.48 4232 1
cov[2,1] 159.48 0.06 3.98 151.79 156.77 159.44 162.13 167.48 4232 1
cov[2,2] 6.19 0.00 0.16 5.88 6.08 6.19 6.30 6.51 4790 1
x_rand[1] 135.19 0.75 67.87 18.34 85.18 132.06 179.13 278.49 8296 1
x_rand[2] 5.29 0.03 2.41 1.00 4.00 5.00 7.00 10.00 8133 1
attempt 0.06 0.00 0.24 0.00 0.00 0.00 0.00 1.00 7560 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -35403.80 0.03 1.76 -35408.14 -35404.73 -35403.49 -35402.51 -35401.38 3435 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 19:55:18 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- MoTR & Surprisal --------------------------------------------')
[1] "---------------------------- MoTR & Surprisal --------------------------------------------"
print(fit_msurp)
Inference for Stan model: stats_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 193.29 0.02 2.22 189.00 191.80 193.27 194.83 197.56 7883 1
mu[2] 6.02 0.00 0.08 5.86 5.97 6.02 6.08 6.19 8202 1
sigma[1] 135.56 0.02 1.87 132.03 134.29 135.53 136.83 139.29 6733 1
sigma[2] 5.10 0.00 0.08 4.95 5.05 5.10 5.15 5.25 6738 1
nu 11.72 0.02 1.35 9.45 10.76 11.58 12.56 14.76 6506 1
rho 0.57 0.00 0.01 0.54 0.56 0.57 0.58 0.60 7907 1
cov[1,1] 18380.00 6.19 508.20 17430.66 18032.66 18367.66 18723.49 19401.81 6740 1
cov[1,2] 396.36 0.20 15.81 365.40 385.79 396.30 407.09 427.00 6245 1
cov[2,1] 396.36 0.20 15.81 365.40 385.79 396.30 407.09 427.00 6245 1
cov[2,2] 25.98 0.01 0.78 24.50 25.45 25.98 26.51 27.56 6750 1
x_rand[1] 229.82 1.41 125.86 23.36 137.77 220.32 308.08 504.67 7929 1
x_rand[2] 7.63 0.05 4.51 0.66 4.30 7.12 10.37 17.97 7826 1
attempt 0.21 0.01 0.51 0.00 0.00 0.00 0.00 2.00 7854 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -43211.31 0.03 1.76 -43215.70 -43212.20 -43210.97 -43210.02 -43208.89 3913 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 20:47:52 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
[1] "---------------------------- EyeTR & Surprisal --------------------------------------------"
print(fit_esurp)
Inference for Stan model: stats_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 123.52 0.01 1.17 121.28 122.73 123.52 124.30 125.81 7205 1
mu[2] 6.14 0.00 0.08 5.97 6.08 6.13 6.19 6.30 7266 1
sigma[1] 72.79 0.01 0.95 70.95 72.15 72.77 73.43 74.64 6894 1
sigma[2] 5.16 0.00 0.07 5.01 5.11 5.16 5.21 5.30 5938 1
nu 14.77 0.03 1.97 11.49 13.38 14.56 15.97 19.18 5988 1
rho 0.63 0.00 0.01 0.61 0.62 0.63 0.64 0.66 8585 1
cov[1,1] 5299.28 1.66 138.13 5033.28 5205.63 5295.99 5392.41 5571.18 6894 1
cov[1,2] 237.27 0.11 8.26 221.19 231.62 237.24 242.84 253.80 6152 1
cov[2,1] 237.27 0.11 8.26 221.19 231.62 237.24 242.84 253.80 6152 1
cov[2,2] 26.59 0.01 0.77 25.10 26.07 26.58 27.11 28.14 5952 1
x_rand[1] 141.12 0.80 69.27 22.01 91.25 136.11 184.96 290.51 7508 1
x_rand[2] 7.68 0.05 4.55 0.67 4.31 7.15 10.45 17.96 7265 1
attempt 0.17 0.00 0.45 0.00 0.00 0.00 0.00 1.00 8132 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -40032.38 0.03 1.76 -40036.66 -40033.33 -40032.04 -40031.08 -40029.99 3594 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 21:06:56 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# MoTR & Log Freq
stan_trace(fit_mfreq)
stan_dens(fit_mfreq, separate_chains = TRUE)
stan_plot(fit_mfreq)
# EyeTR & Log Freq
stan_trace(fit_efreq)
stan_dens(fit_efreq, separate_chains = TRUE)
stan_plot(fit_efreq)
# MoTR & Len
stan_trace(fit_mlen)
stan_dens(fit_mlen, separate_chains = TRUE)
stan_plot(fit_mlen)
# EyeTR & Len
stan_trace(fit_elen)
stan_dens(fit_elen, separate_chains = TRUE)
stan_plot(fit_elen)
# MoTR & Surprisal
stan_trace(fit_msurp)
stan_dens(fit_msurp, separate_chains = TRUE)
stan_plot(fit_msurp)
# EyeTR & Surprisal
stan_trace(fit_esurp)
stan_dens(fit_esurp, separate_chains = TRUE)
stan_plot(fit_esurp)
print('---------------------------- MoTR & Log Freq --------------------------------------------')
[1] "---------------------------- MoTR & Log Freq --------------------------------------------"
rho_mfreq = as.numeric(extract(fit_mfreq, "rho")[[1]])
mean = mean(rho_mfreq)
crI = quantile(rho_mfreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mfreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: -0.7613
HPD: [-0.776, -0.7461]
crI: [-0.7762, -0.7462]
print('---------------------------- EyeTR & Log Freq --------------------------------------------')
[1] "---------------------------- EyeTR & Log Freq --------------------------------------------"
rho_efreq = as.numeric(extract(fit_efreq, "rho")[[1]])
mean = mean(rho_efreq)
crI = quantile(rho_efreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_efreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: -0.8182
HPD: [-0.8297, -0.8065]
crI: [-0.8295, -0.8062]
print('---------------------------- MoTR & Length --------------------------------------------')
[1] "---------------------------- MoTR & Length --------------------------------------------"
rho_mlen = as.numeric(extract(fit_mlen, "rho")[[1]])
mean = mean(rho_mlen)
crI = quantile(rho_mlen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mlen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.8867
HPD: [0.8784, 0.8943]
crI: [0.8784, 0.8943]
print('---------------------------- EyeTR & Length --------------------------------------------')
[1] "---------------------------- EyeTR & Length --------------------------------------------"
rho_elen = as.numeric(extract(fit_elen, "rho")[[1]])
mean = mean(rho_elen)
crI = quantile(rho_elen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_elen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.883
HPD: [0.8743, 0.8908]
crI: [0.8746, 0.8911]
print('---------------------------- MoTR & Surprisal --------------------------------------------')
[1] "---------------------------- MoTR & Surprisal --------------------------------------------"
rho_msurp = as.numeric(extract(fit_msurp, "rho")[[1]])
mean = mean(rho_msurp)
crI = quantile(rho_msurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_msurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.5735
HPD: [0.5447, 0.6033]
crI: [0.5432, 0.602]
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
[1] "---------------------------- EyeTR & Surprisal --------------------------------------------"
rho_esurp = as.numeric(extract(fit_esurp, "rho")[[1]])
mean = mean(rho_esurp)
crI = quantile(rho_esurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_esurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.632
HPD: [0.6075, 0.6574]
crI: [0.6066, 0.6569]
print('---------------------------- MoTR & Log Frequency--------------------------------------------')
mfreq_rand <- extract(fit_mfreq, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 12), type="n",
xlab = "MoTR value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mfreq_rand[,1], mfreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mfreq_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mfreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mfreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- EyeTR & Log Frequency--------------------------------------------')
efreq_rand <- extract(fit_efreq, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 500), ylim=c(0, 12), type="n",
xlab = "Eye tracking value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(efreq_rand[,1], efreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(efreq_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(efreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(efreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- MoTR & Length --------------------------------------------')
mlen_rand <- extract(fit_mlen, "x_rand")[[1]]
# mlen_rand
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
xlab = "MoTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mlen_rand[,1], mlen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mlen_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mlen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- EyeTR & Length --------------------------------------------')
elen_rand <- extract(fit_elen, "x_rand")[[1]]
# elen_rand
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
xlab = "EyeTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(elen_rand[,1], elen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(elen_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(elen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- MoTR & Surprisal --------------------------------------------')
msurp_rand <- extract(fit_msurp, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
xlab = "MoTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(msurp_rand [,1], msurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(msurp_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(msurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(msurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
esurp_rand <- extract(fit_esurp, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
xlab = "EyeTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(esurp_rand [,1], esurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(esurp_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(esurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(esurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print("EyeTR vs. EyeTR Fisrt Pass Regression Prob. < 0.3 ")
[1] "EyeTR vs. EyeTR Fisrt Pass Regression Prob. < 0.3 "
ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1e-5),
eyetr_value_2 = pmax(value_2, 1e-5))
ereg_df_low = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1e-5),
eyetr_value_2 = pmax(value_2, 1e-5)) %>%
filter(eyetr_value_1 < 0.3)
# View(ereg_df_low)
ereg_df_high = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1e-5),
eyetr_value_2 = pmax(value_2, 1e-5)) %>%
filter(eyetr_value_1 >= 0.3)
# View(ereg_df_high)
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
cor
0.741
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$p.value)
[1] 0
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$estimate)
cor
0.4586
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$p.value)
[1] 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000293
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$estimate)
cor
0.6653
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$p.value)
[1] 0.000000000000000000000000000000000000000002144
# View(egd_df)
ereg_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
drop_na() %>%
data.matrix()
ereg_temp_low <- ereg_df_low[c("eyetr_value_1", "eyetr_value_2")] %>%
drop_na() %>%
data.matrix()
ereg_temp_high <- ereg_df_high[c("eyetr_value_1", "eyetr_value_2")] %>%
drop_na() %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))

# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
main = "FPReg all data Not Log-Transformed")
plot(ereg_temp_low, pch = 16, col = "blue",
main = "FPReg < 0.3 Not Log-Transformed")
plot(ereg_temp_high, pch = 16, col = "blue",
main = "FPReg > 0.3 Not Log-Transformed")

# -------fit model eyetr vs. eyetr FPReg <0.3 & >=0.3 ----------
reg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_reg = stan(
# file="stan_models/bivariate_beta_correlation_reg.stan",
file = "stan_models/bivariate_normal_reg.stan",
data=reg_data,
iter=4000,
chains=4,
cores=4,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_all_data.rds"))
exploratory: divide eye tracking regression data into two
parts.
# fit_ereg_all = readRDS("./eyetr_eyetr_FPReg_cor_all_data.rds")
fit_ereg_all = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor.rds")
fit_ereg_low = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_00-03.rds")
fit_ereg_high = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_03-1.rds")
print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. all data --------------------------------------------"
print(fit_ereg_all)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.11 0.00 0.00 0.10 0.10 0.11 0.11 0.11 7352 1
mu[2] 0.11 0.00 0.00 0.10 0.11 0.11 0.11 0.11 6993 1
sigma[1] 0.11 0.00 0.00 0.10 0.11 0.11 0.11 0.11 5584 1
sigma[2] 0.10 0.00 0.00 0.10 0.10 0.10 0.10 0.11 5441 1
nu 3.16 0.00 0.13 2.92 3.07 3.16 3.24 3.42 6221 1
rho 0.73 0.00 0.01 0.71 0.73 0.73 0.74 0.76 7209 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 5581 1
cov[1,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 4877 1
cov[2,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 4877 1
cov[2,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 5442 1
x_rand[1] 0.17 0.00 0.14 0.01 0.08 0.14 0.22 0.47 7543 1
x_rand[2] 0.17 0.00 0.14 0.01 0.08 0.14 0.21 0.46 8201 1
attempt 0.37 0.01 0.70 0.00 0.00 0.00 1.00 2.00 7941 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 11696.50 0.03 1.76 11692.31 11695.54 11696.82 11697.81 11698.90 3562 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 01:21:18 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------"
print(fit_ereg_low)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.11 0.00 0.00 0.11 0.11 0.11 0.11 0.12 7590 1
mu[2] 0.12 0.00 0.00 0.12 0.12 0.12 0.12 0.12 6828 1
sigma[1] 0.08 0.00 0.00 0.07 0.08 0.08 0.08 0.08 7489 1
sigma[2] 0.09 0.00 0.00 0.08 0.08 0.09 0.09 0.09 6449 1
nu 24.44 0.08 6.00 16.09 20.27 23.37 27.29 39.52 5520 1
rho 0.47 0.00 0.02 0.44 0.46 0.47 0.49 0.51 8485 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 7484 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6928 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6928 1
cov[2,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 6439 1
x_rand[1] 0.13 0.00 0.07 0.01 0.08 0.12 0.17 0.28 7848 1
x_rand[2] 0.14 0.00 0.08 0.01 0.08 0.13 0.19 0.31 7951 1
attempt 0.17 0.00 0.44 0.00 0.00 0.00 0.00 1.00 7714 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 7720.01 0.03 1.74 7715.76 7719.08 7720.31 7721.28 7722.38 3981 1
Samples were drawn using NUTS(diag_e) at Sun Aug 6 00:32:16 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------"
print(fit_ereg_high)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.44 0.00 0.01 0.43 0.44 0.44 0.45 0.46 5432 1
mu[2] 0.37 0.00 0.01 0.35 0.36 0.37 0.38 0.39 6127 1
sigma[1] 0.12 0.00 0.01 0.11 0.11 0.12 0.12 0.13 5531 1
sigma[2] 0.16 0.00 0.01 0.15 0.16 0.16 0.17 0.18 5479 1
nu 28.02 0.17 12.90 11.34 18.95 25.20 34.27 60.04 6081 1
rho 0.67 0.00 0.03 0.60 0.65 0.67 0.69 0.73 6652 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 5539 1
cov[1,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 4763 1
cov[2,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 4763 1
cov[2,2] 0.03 0.00 0.00 0.02 0.02 0.03 0.03 0.03 5491 1
x_rand[1] 0.44 0.00 0.12 0.21 0.36 0.45 0.53 0.68 7922 1
x_rand[2] 0.38 0.00 0.16 0.08 0.26 0.37 0.48 0.71 8307 1
attempt 0.02 0.00 0.13 0.00 0.00 0.00 0.00 0.00 8124 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 793.27 0.03 1.79 788.73 792.39 793.63 794.56 795.67 3841 1
Samples were drawn using NUTS(diag_e) at Sun Aug 6 00:36:26 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# # FPReg all data
stan_trace(fit_ereg_all)
'pars' not specified. Showing first 10 parameters by default.

stan_dens(fit_ereg_all, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.

stan_plot(fit_ereg_all)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

# # FPReg < 0.3
stan_trace(fit_ereg_low)
'pars' not specified. Showing first 10 parameters by default.

stan_dens(fit_ereg_low, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.

stan_plot(fit_ereg_low)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

# FPReg >= 0.3
stan_trace(fit_ereg_high)
'pars' not specified. Showing first 10 parameters by default.

stan_dens(fit_ereg_high, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.

stan_plot(fit_ereg_high)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

print('---------------------------- First Pass Regression all data--------------------------------------------')
[1] "---------------------------- First Pass Regression all data--------------------------------------------"
rho_ereg_all = as.numeric(extract(fit_ereg_all, "rho")[[1]])
mean = mean(rho_ereg_all)
crI = quantile(rho_ereg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.7339
HPD: [0.7128, 0.7557]
crI: [0.7119, 0.755]
print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3--------------------------------------------"
rho_ereg_low = as.numeric(extract(fit_ereg_low, "rho")[[1]])
mean = mean(rho_ereg_low)
crI = quantile(rho_ereg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.4739
HPD: [0.4393, 0.5071]
crI: [0.4395, 0.5073]
print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3--------------------------------------------"
rho_ereg_high = as.numeric(extract(fit_ereg_high, "rho")[[1]])
mean = mean(rho_ereg_high)
crI = quantile(rho_ereg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.6711
HPD: [0.607, 0.7342]
crI: [0.6033, 0.7315]
print('---------------------------- First Pass Regression all data --------------------------------------------')
[1] "---------------------------- First Pass Regression all data --------------------------------------------"
eallreg_rand <- extract(fit_ereg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(eallreg_rand[,1], eallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(eallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(eallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3 --------------------------------------------"
elowreg_rand <- extract(fit_ereg_low, "x_rand")[[1]]
# print(elowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(elowreg_rand[,1], elowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_low, pch=16, col="red")
# add dataEllipse with color
dataEllipse(elowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3 --------------------------------------------"
ehighreg_rand_samples <- extract(fit_ereg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(ehighreg_rand_samples), 900)
ehighreg_rand <- ehighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(ehighreg_rand[,1], ehighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_high, pch=16, col="red")
# add dataEllipse with color
dataEllipse(ehighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ehighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

---
title: "Exploratory Analysis for MoTR Reading Data"
output: html_notebook
---

```{r}
shhh <- suppressPackageStartupMessages # It's a library, so shhh!

shhh(library( mgcv ))
shhh(library(dplyr))
shhh(library(ggplot2))
shhh(library(lme4))
shhh(library(tidymv))
shhh(library(gamlss))
shhh(library(gsubfn))
shhh(library(lmerTest))
shhh(library(tidyverse))
shhh(library(boot))
shhh(library(rsample))
shhh(library(plotrix))
shhh(library(ggrepel))
shhh(library(mgcv))

shhh(library(brms))
shhh(library(bayesplot))
shhh(library(patchwork))
shhh(library(MASS))
shhh(library(tidyr))
shhh(library(extraDistr))
shhh(library(purrr))
# For exercises with Stan code
shhh(library(rstan))
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = FALSE)

library(car)
library(coda)
shhh(library(gridExtra))

theme_set(theme_bw())
options(digits=4)
options(scipen=999)
set.seed(444)
pipe_message = function(.data, status) {message(status); .data}

```


# Read in MoTR Data

```{r}

rate = 160

file_prefix = "../data/provo_f160/"
fnames = list.files(path=file_prefix)

df = data.frame()
for (f in fnames) {
  temp = read.csv(paste0(file_prefix, "/", f)) %>%
    mutate(subj = str_remove(f, "_reading_measures.csv"))
  df = rbind(df, temp)
}

# Filter out readers who don't answer the comprehension questions correctly
filter_df = df %>%
  group_by(para_nr, subj) %>% summarise(correct = if_else(unique(correctness) == 1, 1, 0)) %>% ungroup() %>%
  drop_na() %>%
  group_by(subj) %>% summarise(p_correct = mean(correct)) %>% ungroup() %>%
  mutate(p_correct = round(p_correct, digits = 2))

filter_df = filter_df %>% filter(p_correct < 0.8)
filter_list = filter_df$subj
## reader_3:0.70, reader_60:0.79, reader_76:0.72 , reader_256:0.71 , reader_262:0.57 

raw_df = df %>%
  filter(! subj %in% c(filter_list)) %>%
  mutate(word = str_trim(word)) %>%
  mutate(subj = str_remove(subj, "reader_")) %>%
  mutate(subj = as.character(subj)) %>%
  # filter(! subj %in% c("3", "60", "76", "256", "262")) %>% # Explanation for this filtering
  mutate(FPReg = if_else(total_duration == 0, -1, FPReg)) %>% #If the word is skipped we can't say that it wasn't regressed on the first pass. Set to a "NA"
  dplyr::select(expr_id, cond_id, para_nr, word, word_nr, first_duration, total_duration, gaze_duration, go_past_time, FPReg, subj)

length(unique(raw_df$subj))

df %>%
  filter(! subj %in% c(filter_list)) %>%
  filter(FPReg >= 0) %>%
  dplyr::select(FPReg) %>%
  drop_na() %>%
  summarise( m = mean(FPReg))

df %>%
  filter(! subj %in% c(filter_list)) %>%
  dplyr::select(FPFix) %>%
  drop_na() %>%
  summarise( m = mean(FPFix))


```


```{r}
# Average across subjects
motr_agg_df = raw_df %>%
  gather(metric, value, 6:10) %>%
    filter(value >= 0) %>% #Removes the "NA" values for FPReg
  
    # ==== Remove skipped words
    # mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
    # filter(zero == F) %>%
  
    drop_na() %>%
    group_by(para_nr, word_nr, word, metric) %>% 
      mutate(outlier = if_else(metric != "FPReg" & value > (mean(value) + 3 * sd(value)), T, F)) %>% filter(outlier == F) %>%
      summarise(value = mean(value), nsubj = length(unique(subj))) %>%
  ungroup() %>%
  arrange(para_nr, word_nr) %>%
  rename(text_id = para_nr, word_text_idx = word_nr, motr_value = value)

```




# Comparison to Provo


```{r}
# Read in Provo surprisal, frequency and length data
provo_modeling_df = read.csv("../data/provo_stats.csv") %>%
  dplyr::select(text_id, sent_id, trigger_idx, word, freq, surp, len) %>%
  rename(word_idx = trigger_idx)

provo_modeling_df

```

```{r}
# Read in Provo eyetracking data

provo_raw_df = read.csv("../data/provo_eyetracking.csv")

```

```{r}

# unique(provo_raw_df$Participant_ID)
# length(unique(provo_raw_df$Participant_ID))

provo_eyetracking_df = provo_raw_df %>%
  dplyr::select(Participant_ID, Text_ID, Sentence_Number, Word_In_Sentence_Number, Word, Word_Number, IA_FIRST_FIX_PROGRESSIVE, IA_FIRST_RUN_DWELL_TIME, IA_DWELL_TIME, IA_REGRESSION_PATH_DURATION, IA_REGRESSION_OUT, IA_SKIP) %>%
  rename( #first_duration = IA_FIRST_FIXATION_DURATION,   
          gaze_duration = IA_FIRST_RUN_DWELL_TIME,
          total_duration = IA_DWELL_TIME,
          go_past_time = IA_REGRESSION_PATH_DURATION,
          subj = Participant_ID,
          text_id = Text_ID,
          sent_id = Sentence_Number,
          word_idx = Word_In_Sentence_Number,
          word_text_idx = Word_Number,   # IA_ID?
          word = Word,      # Word?
          FPReg = IA_REGRESSION_OUT,
          skip = IA_SKIP,
          ff_progressive = IA_FIRST_FIX_PROGRESSIVE) %>%
  mutate(first_duration = gaze_duration) %>%
  mutate(gaze_duration = if_else(ff_progressive == 0, 0, as.double(gaze_duration)),
         go_past_time = if_else(ff_progressive == 0, 0, as.double(go_past_time))) %>%
  dplyr::select(-ff_progressive) %>%
  
  mutate(
    gaze_duration = if_else(total_duration == 0, 0, as.double(gaze_duration)),
      go_past_time = if_else(total_duration == 0, 0, as.double(go_past_time)),
      FPReg = if_else(total_duration == 0, -1, as.double(FPReg)),
      first_duration =  if_else(total_duration == 0, 0, as.double(first_duration)),
  ) %>%
  
  # drop_na() %>%     # will drop the whole row with all the metrics
  gather(metric, value, 7:12) %>%
  filter(value >= 0) %>%          # filter skipped word in eye tracking data for FPReg
  # ==== Remove skipped words
  # mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
  # filter(zero == F) %>%
  
  # mutate(value = if_else(is.na(value), as.integer(0), as.integer(value))) %>%
  # mutate(value = if_else(metric != "FPReg" & is.na(value), as.integer(0), as.integer(value))) %>%
  drop_na() %>%
  mutate(word = str_trim(word)) %>%
  mutate(subj = str_remove(subj, "Sub")) %>%
  mutate(subj = as.integer(subj)) %>%
    group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    mutate(outlier = if_else(metric != "FPReg" & metric != "skip" & value > (mean(value) + 3 * sd(value) ), T, F)) %>%
    filter(outlier == F) %>%
  ungroup() #%>%

# Aggregate cross-participant data for all subjects
provo_eyetracking_agg_df = provo_eyetracking_df %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value),
              nsubj = length(unique(subj))) %>%
    ungroup()

provo_raw_df %>%
  dplyr::select(IA_REGRESSION_OUT) %>%
  drop_na() %>%
  summarise( m = mean(IA_REGRESSION_OUT))

provo_raw_df %>%
  dplyr::select(IA_SKIP) %>%
  drop_na() %>%
  summarise( m = mean(IA_SKIP))


```

```{r}

# Split the eyetracking data in two by subjects to see how well it correlates with itself
provo_eyetracking_subj1_df_temp = provo_eyetracking_df %>%
  filter(subj <= 42) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value)) %>%
  ungroup() %>%
  rename(value_1 = value) #%>%
  # dplyr::select(-sent_id, -word_idx)


provo_eyetracking_subj1_df = merge(provo_eyetracking_subj1_df_temp, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
  arrange(text_id, sent_id, word_idx) %>%
  filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
  filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
  rename(word = word.y) %>%
  dplyr::select(text_id, word_text_idx, metric, word, value_1)

# View(provo_eyetracking_subj1_df)

provo_eyetracking_subj2_df = provo_eyetracking_df %>%
  filter(subj > 42) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value)) %>%
  ungroup() %>%
    rename(value_2 = value)%>%
  dplyr::select(-sent_id, -word_idx)

# View(provo_eyetracking_subj2_df)
  
provo_eyetr_grouped_df = merge(provo_eyetracking_subj2_df, provo_eyetracking_subj1_df, by=c("text_id", "word_text_idx", "metric")) %>%
  # filter(word.x == word.y) %>%
  dplyr::select(-word.y) %>%
  group_by(metric) %>%
    mutate(motr_outlier = if_else(metric != "FPReg" & metric != "skip" & value_1 > (mean(value_1) + 3 * sd(value_1) ), T, F)) %>%
    filter(motr_outlier == F) %>%
    mutate(eyetr_outlier = if_else(metric != "FPReg" & metric != "skip" & value_2 > (mean(value_2) + 3 * sd(value_2) ), T, F)) %>%
    filter(eyetr_outlier == F) %>%
  ungroup() %>%
  gather(measure, value, c("value_1", "value_2")) %>%
  dplyr::select(-motr_outlier, -eyetr_outlier)

# View(provo_eyetr_grouped_df)

```


```{r}
provo_df = merge(provo_eyetracking_agg_df, provo_modeling_df, by=c("text_id", "sent_id", "word_idx")) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  arrange(text_id, sent_id, word_idx) %>%
  rename(eyetr_value = value) 

provo_df = merge(provo_df, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
arrange(text_id, sent_id, word_idx) %>%
  # almost all the word.x != word.y is because of normalization problem, so we can keep them, instead, deleting some special cases
filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
  filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
# filter(word.x == word) #%>%
dplyr::select(-word.x, -word.y) %>%
group_by(metric) %>%
  mutate(motr_outlier = if_else(metric != "FPReg" & motr_value > (mean(motr_value) + 3 * sd(motr_value) ), T, F)) %>%
  filter(motr_outlier == F) %>%
  mutate(eyetr_outlier = if_else(metric != "FPReg" & eyetr_value > (mean(eyetr_value) + 3 * sd(eyetr_value) ), T, F)) %>%
  filter(eyetr_outlier == F) %>%
ungroup() %>%
gather(measure, value, c("eyetr_value", "motr_value")) %>%
dplyr::select(-motr_outlier, -eyetr_outlier)
  
# provo_df
```


# Bayesian -- use Stan -- motr & eyetr correlation
```{r}
print("Gaze Duration")
gd_df = provo_df %>% filter(metric == "gaze_duration") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(gd_df$eyetr_value, gd_df$motr_value)$estimate)
print(cor.test(gd_df$eyetr_value_log, gd_df$motr_value_log)$estimate)
# View(gd_df)
```


```{r}
gd_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")
  
```



```{r}
# center data around 0.

gd_temp <- gd_df[c("eyetr_value", "motr_value")] %>%
   # mutate(eyetr_value = eyetr_value - mean(eyetr_value),
   #      motr_value = motr_value - mean(motr_value)) %>%
  data.matrix()

gd_temp_log <- gd_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(gd_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix gd_temp_log
plot(gd_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")

```



```{r, eval=FALSE}
gd_data = list(x=gd_temp, N=nrow(gd_temp))

fit_gd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=gd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_gd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gd, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds"))

```

```{r}
print("Go Past Time")
gpt_df = provo_df %>% filter(metric == "go_past_time") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(gpt_df$eyetr_value, gpt_df$motr_value)$estimate)
print(cor.test(gpt_df$eyetr_value_log, gpt_df$motr_value_log)$estimate)

gpt_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

gpt_temp <- gpt_df[c("eyetr_value", "motr_value")] %>% data.matrix()

gpt_temp_log <- gpt_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gpt_temp
plot(gpt_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix gpt_temp_log
plot(gpt_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model go past time ----------
gpt_data = list(x=gpt_temp, N=nrow(gpt_temp))
fit_gpt = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=gpt_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_gpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gpt, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds"))
```


```{r}
print("Total Duration")
td_df = provo_df %>% filter(metric == "total_duration") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(td_df$eyetr_value, td_df$motr_value)$estimate)
print(cor.test(td_df$eyetr_value_log, td_df$motr_value_log)$estimate)

td_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

td_temp <- td_df[c("eyetr_value", "motr_value")] %>% data.matrix()

td_temp_log <- td_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(td_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix td_temp_log
plot(td_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model total duration ----------
td_data = list(x=td_temp, N=nrow(td_temp))
fit_td = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=td_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_td@stanmodel@dso <- new("cxxdso")
saveRDS(fit_td, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds"))
```

```{r}
print("First Pass Regression Prob.")
reg_df = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))
print(cor.test(reg_df$eyetr_value, reg_df$motr_value)$estimate)

# View(reg_df)
reg_df %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp <- reg_df[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model FPReg ----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
  file="stan_models/bivariate_normal_reg.stan", 
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor.rds"))
```



```{r}
# models with all 0s
fit_gd = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor.rds")
fit_gpt = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor.rds")
fit_td = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor.rds")
fit_reg = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor.rds")

# models for drop 0s
# fit_gd = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds")
# fit_gpt = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds")
# fit_td = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds")
# fit_reg = readRDS("./bayesian_models/bayesian_models_correlation/ranked_motr_eyetr_FPReg_cor.rds")

print('---------------------------- Gaze Duration--------------------------------------------')
print(fit_gd)
print('---------------------------- Go Past Time--------------------------------------------')
print(fit_gpt)
print('---------------------------- Total Duration--------------------------------------------')
print(fit_td)
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
print(fit_reg)

```

```{r, eval=FALSE}
# stan_trace(fit_gd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_gd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_gd, pars=c("rho", "mu", "sigma", "nu"))

# Gaze Duration
stan_trace(fit_gd)
stan_dens(fit_gd, separate_chains = TRUE)
stan_plot(fit_gd)

# Go Past Time
stan_trace(fit_gpt)
stan_dens(fit_gpt, separate_chains = TRUE)
stan_plot(fit_gpt)

# Total Duration
stan_trace(fit_td)
stan_dens(fit_td, separate_chains = TRUE)
stan_plot(fit_td)

# FPReg
stan_trace(fit_reg)
stan_dens(fit_reg, separate_chains = TRUE)
stan_plot(fit_reg)

```

```{r, fig.width=5, fig.height=10, eval=FALSE}
p1 <- stan_trace(fit_gd, pars = 'rho', inc_warmup = FALSE)
p2 <- stan_dens(fit_gd, pars = 'rho', separate_chains = TRUE)
p3 <- stan_trace(fit_gd, pars = 'mu[1]', inc_warmup = FALSE)
p4 <- stan_dens(fit_gd, pars = 'mu[1]', separate_chains = TRUE)
p5 <- stan_trace(fit_gd, pars = 'mu[2]', inc_warmup = FALSE)
p6 <- stan_dens(fit_gd, pars = 'mu[2]', separate_chains = TRUE)
p7 <- stan_trace(fit_gd, pars = 'sigma[1]', inc_warmup = FALSE)
p8 <- stan_dens(fit_gd, pars = 'sigma[1]', separate_chains = TRUE)
p9 <- stan_trace(fit_gd, pars = 'sigma[2]', inc_warmup = FALSE)
p10 <- stan_dens(fit_gd, pars = 'sigma[2]', separate_chains = TRUE)
p11 <- stan_trace(fit_gd, pars = 'nu', inc_warmup = FALSE)
p12 <- stan_dens(fit_gd, pars = 'nu', separate_chains = TRUE)


# Use grid.arrange() to arrange the plots
# grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, ncol=2, nrow=6)

```



```{r}
print('---------------------------- Gaze Duration--------------------------------------------')
rho_gd = as.numeric(extract(fit_gd, "rho")[[1]])
mean = mean(rho_gd)
crI = quantile(rho_gd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Go Past Time--------------------------------------------')
rho_gpt = as.numeric(extract(fit_gpt, "rho")[[1]])
mean = mean(rho_gpt)
crI = quantile(rho_gpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Total Duration--------------------------------------------')
rho_td = as.numeric(extract(fit_td, "rho")[[1]])
mean = mean(rho_td)
crI = quantile(rho_td, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_td), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression --------------------------------------------')
# rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
rho_reg = as.numeric(extract(fit_reg, "rho")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
```


```{r, eval=FALSE}
print('---------------------------- Gaze Duration--------------------------------------------')
gd_rand <- extract(fit_gd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(gd_rand[,1], gd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(gd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Go Past Time--------------------------------------------')
gpt_rand <- extract(fit_gpt, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(gpt_rand[,1], gpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gpt_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(gpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Total Duration--------------------------------------------')
td_rand <- extract(fit_td, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Total Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(td_rand[,1], td_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(td_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(td_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(td_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression --------------------------------------------')
reg_rand <- extract(fit_reg, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(reg_rand[,1], reg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(reg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(reg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")


```

# model motr eyetr FPReg correlation (eyetr < 0.3)
```{r}
print("First Pass Regression Prob. all and  < 0.3")
reg_df_all = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))

reg_df_low_drop0 = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value < 0.3)

reg_df_low = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value < 0.3)
  # mutate(eyetr_value = exp(eyetr_value),
         # motr_value = exp(motr_value)
         # )
# View(reg_df)

print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$estimate)
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$p.value)
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$estimate)
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$p.value)
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$estimate)
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$p.value)

# View(reg_df)
reg_df_low %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp_all <- reg_df_all[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low <- reg_df_low[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low_drop0 <- reg_df_low_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp_low, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model FPReg < 0.3 ----------
reg_data = list(x=reg_temp_all, N=nrow(reg_temp_all))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_all_data_drop0s.rds"))
```


# model motr eyetr FPReg correlation (eyetr >= 0.3)
```{r}
print("First Pass Regression Prob. >= 0.3")
reg_df_high_drop0 = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value >= 0.3)

reg_df_high = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value >= 0.3)
  # mutate(eyetr_value = exp(eyetr_value),
         # motr_value = exp(motr_value)
         # )
# View(reg_df)

print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$estimate)
print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$p.value)
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$estimate)
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$p.value)

# View(reg_df)
reg_df_high %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp_high <- reg_df_high[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_high_drop0 <- reg_df_high_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix td_temp
plot(reg_temp_all, pch = 16, col = "blue",
     main = "FPReg not logged all data")
plot(reg_temp_low, pch = 16, col = "blue",
     main = "FPReg not logged eyetr < 0.3 ")
plot(reg_temp_high, pch = 16, col = "blue",
     main = "FPReg not logged eyetr >= 0.3")
```

```{r, eval=FALSE}
# -------fit model FPReg >= 0.3 ----------
reg_data = list(x=reg_temp_high_drop0, N=nrow(reg_temp_high_drop0))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0(".//bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_03-1_drop0s.rds"))
```



```{r}
fit_mreg_all = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_all_data.rds")
fit_mreg_all_drop0 = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_all_data_drop0s.rds")
fit_mreg_low = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_00-03.rds")
fit_mreg_low_drop0 = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_00-03_drop0s.rds")
fit_mreg_high = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_03-1.rds")
fit_mreg_high_drop0 = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_03-1_drop0s.rds")

print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
print(fit_mreg_all)
print('---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------')
print(fit_mreg_all_drop0)
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
print(fit_mreg_low)
print('---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------')
print(fit_mreg_low_drop0)
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
print(fit_mreg_high)
print('---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------')
print(fit_mreg_high_drop0)
```

```{r}
# # FPReg all data
# stan_trace(fit_mreg_all)
# stan_dens(fit_mreg_all, separate_chains = TRUE)
# stan_plot(fit_mreg_all)

# stan_trace(fit_mreg_all_drop0)
# stan_dens(fit_mreg_all_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_all_drop0)

# # FPReg < 0.3
# stan_trace(fit_mreg_low)
# stan_dens(fit_mreg_low, separate_chains = TRUE)
# stan_plot(fit_mreg_low)
# 
# stan_trace(fit_mreg_low_drop0)
# stan_dens(fit_mreg_low_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_low_drop0)

# FPReg > 0.3
stan_trace(fit_mreg_high)
stan_dens(fit_mreg_high, separate_chains = TRUE)
stan_plot(fit_mreg_high)

stan_trace(fit_mreg_high_drop0)
stan_dens(fit_mreg_high_drop0, separate_chains = TRUE)
stan_plot(fit_mreg_high_drop0)
```

```{r}
print('---------------------------- First Pass Regression all data--------------------------------------------')
rho_mreg_all = as.numeric(extract(fit_mreg_all, "rho")[[1]])
mean = mean(rho_mreg_all)
crI = quantile(rho_mreg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
rho_mreg_all_drop0 = as.numeric(extract(fit_mreg_all_drop0, "rho")[[1]])
mean = mean(rho_mreg_all_drop0)
crI = quantile(rho_mreg_all_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
rho_mreg_low = as.numeric(extract(fit_mreg_low, "rho")[[1]])
mean = mean(rho_mreg_low)
crI = quantile(rho_mreg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------')
rho_mreg_low_drop0 = as.numeric(extract(fit_mreg_low_drop0, "rho")[[1]])
mean = mean(rho_mreg_low_drop0)
crI = quantile(rho_mreg_low_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
rho_mreg_high = as.numeric(extract(fit_mreg_high, "rho")[[1]])
mean = mean(rho_mreg_high)
crI = quantile(rho_mreg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------')
rho_mreg_high_drop0 = as.numeric(extract(fit_mreg_high_drop0, "rho")[[1]])
mean = mean(rho_mreg_high_drop0)
crI = quantile(rho_mreg_high_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
```

```{r, eval=FALSE}
print('---------------------------- First Pass Regression all data --------------------------------------------')
mallreg_rand <- extract(fit_mreg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mallreg_rand[,1], mallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
mallreg_rand_drop0 <- extract(fit_mreg_all_drop0, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mallreg_rand_drop0[,1], mallreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mallreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
mlowreg_rand <- extract(fit_mreg_low, "x_rand")[[1]]
print(mlowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlowreg_rand[,1], mlowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 no 0s --------------------------------------------')
mlowreg_rand_drop0 <- extract(fit_mreg_low_drop0, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlowreg_rand_drop0[,1], mlowreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low_drop0, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlowreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
mhighreg_rand_samples <- extract(fit_mreg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(mhighreg_rand_samples), 900)
mhighreg_rand <- mhighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mhighreg_rand[,1], mhighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mhighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

# print('---------------------------- First Pass Regression >= 0.3 no 0s --------------------------------------------')
mhighreg_rand_drop0_samples <- extract(fit_mreg_high_drop0, "x_rand")[[1]]
selected_indices <- sample(1:nrow(mhighreg_rand_drop0_samples), 900)
mhighreg_rand_drop0 <- mhighreg_rand_drop0_samples[selected_indices, ]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color
points(mhighreg_rand_drop0[,1], mhighreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high_drop0, pch=16, col="red")

# add dataEllipse with color
dataEllipse(mhighreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```


# model eye tracking to eye tracking correlation
```{r}

print("Gaze Duration")
# View(provo_eyetr_grouped_df)

egd_df = provo_eyetr_grouped_df %>% filter(metric == "gaze_duration") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(egd_df$eyetr_value_1, egd_df$eyetr_value_2)$estimate)

# View(egd_df)

egd_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

egd_temp <- egd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(egd_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")

```

```{r, eval=FALSE}
egd_data = list(x=egd_temp, N=nrow(egd_temp))

fit_egd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=egd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_egd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egd, file = paste0("./bayesian_models/bayesian_models_correlation/cor_bivariate/eyetr_eyetr_gaze_duration_cor.rds"))

```

```{r}
print("Go Past Time")

egpt_df = provo_eyetr_grouped_df %>% filter(metric == "go_past_time") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(egpt_df$eyetr_value_1, egpt_df$eyetr_value_2)$estimate)

# View(egd_df)

egpt_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

egpt_temp <- egpt_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(egpt_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
egpt_data = list(x=egpt_temp, N=nrow(egpt_temp))

fit_egpt = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=egpt_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_egpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egpt, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_go_past_time_cor.rds"))

```


```{r}
print("Total Duration")

etd_df = provo_eyetr_grouped_df %>% filter(metric == "total_duration") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(etd_df$eyetr_value_1, etd_df$eyetr_value_2)$estimate)

# View(egd_df)

etd_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

etd_temp <- etd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(etd_temp, pch = 16, col = "blue",
     main = "Total Duration Not Log-Transformed")
```


```{r, eval=FALSE}
etd_data = list(x=etd_temp, N=nrow(etd_temp))

fit_etd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=etd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_etd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_etd, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_total_duration_cor.rds"))
```


```{r}
print("Fisrt Pass Regression Prob.")

ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)
  )
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)

# View(egd_df)

ereg_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
     main = "FPReg Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model FPReg ----------

# View(ereg_temp)
ereg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_ereg = stan(
  file="stan_models/bivariate_normal_reg.stan", 
  data=ereg_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99),
  verbose = FALSE
  )

# Save the model 
fit_ereg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_ereg, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor5.rds"))
```


```{r}
fit_egd = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_gaze_duration_cor.rds")
fit_egpt = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_go_past_time_cor.rds")
fit_etd = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_total_duration_cor.rds")
fit_ereg = readRDS("./bayesian_models/bayesian_models_correlation/ranked_eyetr_eyetr_FPReg_cor.rds")
print('---------------------------- Gaze Duration--------------------------------------------')
print(fit_egd)
print('---------------------------- Go Past Time--------------------------------------------')
print(fit_egpt)
print('---------------------------- Total Duration--------------------------------------------')
print(fit_etd)
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
print(fit_ereg)
```


```{r, eval=FALSE}
# stan_trace(fit_egd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_egd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_egd, pars=c("rho", "mu", "sigma", "nu"))

# Gaze Duration
stan_trace(fit_egd)
stan_dens(fit_egd, separate_chains = TRUE)
stan_plot(fit_egd)

# Go Past Time
stan_trace(fit_egpt)
stan_dens(fit_egpt, separate_chains = TRUE)
stan_plot(fit_egpt)

# Total Duration
stan_trace(fit_etd)
stan_dens(fit_etd, separate_chains = TRUE)
stan_plot(fit_etd)

# FPReg
stan_trace(fit_ereg)
stan_dens(fit_ereg, separate_chains = TRUE)
stan_plot(fit_ereg)
```


```{r}
print('---------------------------- Gaze Duration--------------------------------------------')
rho_egd = as.numeric(extract(fit_egd, "rho")[[1]])
mean = mean(rho_egd)
crI = quantile(rho_egd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Go Past Time--------------------------------------------')
rho_egpt = as.numeric(extract(fit_egpt, "rho")[[1]])
mean = mean(rho_egpt)
crI = quantile(rho_egpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Total Duration--------------------------------------------')
rho_etd = as.numeric(extract(fit_etd, "rho")[[1]])
mean = mean(rho_etd)
crI = quantile(rho_etd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_etd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression --------------------------------------------')
rho_ereg = as.numeric(extract(fit_ereg, "rho")[[1]])
mean = mean(rho_ereg)
crI = quantile(rho_ereg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
```


```{r, eval=FALSE}
print('---------------------------- Gaze Duration--------------------------------------------')
egd_rand <- extract(fit_egd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(egd_rand[,1], egd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(egd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Go Past Time--------------------------------------------')
egpt_rand <- extract(fit_egpt, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(egpt_rand[,1], egpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egpt_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(egpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Total Duration--------------------------------------------')
etd_rand <- extract(fit_etd, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Total Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(etd_rand[,1], etd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(etd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(etd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(etd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression --------------------------------------------')
ereg_rand <- extract(fit_ereg, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "First Pass Regression") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(ereg_rand[,1], ereg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(ereg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ereg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
```


# Bayesian -- correlation between MoTR and word level statistics
```{r}
print("Log Frequency")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$freq)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$freq)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(7, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

mfreq_temp <- stats_cor_df[c("motr_value", "freq")] %>%
  data.matrix()
efreq_temp <- stats_cor_df[c("eyetr_value", "freq")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mfreq_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Word Frequency")

# Plot the first data matrix gd_temp
plot(efreq_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Word Frequency")

```
# motr & frequency
```{r, eval=FALSE}
mfreq_data = list(x=mfreq_temp, N=nrow(mfreq_temp))

fit_mfreq = stan(
  file="stan_models/stats_correlation.stan", 
  data=mfreq_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_mfreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mfreq, file = paste0("./bayesian_models/bayesian_models_correlation/motr_freq_cor.rds"))
```

# eyetr & frequency
```{r, eval=FALSE}
efreq_data = list(x=efreq_temp, N=nrow(efreq_temp))

fit_efreq = stan(
  file="stan_models/stats_correlation.stan", 
  data=efreq_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_efreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_efreq, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_freq_cor.rds"))
```

```{r}
print("Length")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$len)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$len)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(9, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

mlen_temp <- stats_cor_df[c("motr_value", "len")] %>%
  data.matrix()
elen_temp <- stats_cor_df[c("eyetr_value", "len")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mlen_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Word Length")

# Plot the first data matrix gd_temp
plot(elen_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Word Length")
```

# motr & length
```{r, eval=FALSE}
mlen_data = list(x=mlen_temp, N=nrow(mlen_temp))

fit_mlen = stan(
  file="stan_models/stats_correlation_len_normal.stan", 
  data=mlen_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_mlen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mlen, file = paste0("./bayesian_models/bayesian_models_correlation/motr_len_cor.rds"))

```

# eyetr & length
```{r, eval=FALSE}
elen_data = list(x=elen_temp, N=nrow(elen_temp))

fit_elen = stan(
  file="stan_models/stats_correlation_len_normal.stan", 
  data=elen_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_elen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_elen, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_len_cor.rds"))
```


```{r}
print("Surprisal")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$surp)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$surp)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(8, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

msurp_temp <- stats_cor_df[c("motr_value", "surp")] %>%
  data.matrix()
esurp_temp <- stats_cor_df[c("eyetr_value", "surp")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(msurp_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Surprisal")

# Plot the first data matrix gd_temp
plot(esurp_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Surprisal")
```
# motr & surprisal
```{r, eval=FALSE}
msurp_data = list(x=msurp_temp, N=nrow(msurp_temp))

fit_msurp = stan(
  file="stan_models/stats_correlation.stan", 
  data=msurp_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_msurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_msurp, file = paste0("./bayesian_models/bayesian_models_correlation/motr_surp_cor.rds"))

```

# eyetr & surprisal
```{r, eval=FALSE}
esurp_data = list(x=esurp_temp, N=nrow(esurp_temp))

fit_esurp = stan(
  file="stan_models/stats_correlation.stan", 
  data=esurp_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_esurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_esurp, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_surp_cor.rds"))

```


```{r}
fit_mfreq = readRDS("./bayesian_models/bayesian_models_correlation/motr_freq_cor.rds")
fit_efreq = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_freq_cor.rds")
fit_mlen = readRDS("./bayesian_models/bayesian_models_correlation/motr_len_cor.rds")
fit_elen = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_len_cor.rds")
fit_msurp = readRDS("./bayesian_models/bayesian_models_correlation/motr_surp_cor.rds")
fit_esurp = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_surp_cor.rds")

print('---------------------------- MoTR & Log Frequency --------------------------------------------')
print(fit_mfreq)
print('---------------------------- EyeTR & Log Frequency --------------------------------------------')
print(fit_efreq)
print('---------------------------- MoTR & Length --------------------------------------------')
print(fit_mlen)
print('---------------------------- EyeTR & Length --------------------------------------------')
print(fit_elen)
print('---------------------------- MoTR & Surprisal --------------------------------------------')
print(fit_msurp)
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
print(fit_esurp)

```

```{r, eval=FALSE}
# MoTR & Log Freq
stan_trace(fit_mfreq)
stan_dens(fit_mfreq, separate_chains = TRUE)
stan_plot(fit_mfreq)

# EyeTR & Log Freq
stan_trace(fit_efreq)
stan_dens(fit_efreq, separate_chains = TRUE)
stan_plot(fit_efreq)

# MoTR & Len
stan_trace(fit_mlen)
stan_dens(fit_mlen, separate_chains = TRUE)
stan_plot(fit_mlen)

# EyeTR & Len
stan_trace(fit_elen)
stan_dens(fit_elen, separate_chains = TRUE)
stan_plot(fit_elen)

# MoTR & Surprisal
stan_trace(fit_msurp)
stan_dens(fit_msurp, separate_chains = TRUE)
stan_plot(fit_msurp)

# EyeTR & Surprisal
stan_trace(fit_esurp)
stan_dens(fit_esurp, separate_chains = TRUE)
stan_plot(fit_esurp)

```


```{r}
print('---------------------------- MoTR & Log Freq --------------------------------------------')
rho_mfreq = as.numeric(extract(fit_mfreq, "rho")[[1]])
mean = mean(rho_mfreq)
crI = quantile(rho_mfreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mfreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Log Freq --------------------------------------------')
rho_efreq = as.numeric(extract(fit_efreq, "rho")[[1]])
mean = mean(rho_efreq)
crI = quantile(rho_efreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_efreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- MoTR & Length --------------------------------------------')
rho_mlen = as.numeric(extract(fit_mlen, "rho")[[1]])
mean = mean(rho_mlen)
crI = quantile(rho_mlen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mlen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Length --------------------------------------------')
rho_elen = as.numeric(extract(fit_elen, "rho")[[1]])
mean = mean(rho_elen)
crI = quantile(rho_elen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_elen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- MoTR & Surprisal --------------------------------------------')
rho_msurp = as.numeric(extract(fit_msurp, "rho")[[1]])
mean = mean(rho_msurp)
crI = quantile(rho_msurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_msurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Surprisal --------------------------------------------')
rho_esurp = as.numeric(extract(fit_esurp, "rho")[[1]])
mean = mean(rho_esurp)
crI = quantile(rho_esurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_esurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")

```



```{r,eval=FALSE}
print('---------------------------- MoTR & Log Frequency--------------------------------------------')
mfreq_rand <- extract(fit_mfreq, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 12), type="n",
     xlab = "MoTR value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mfreq_rand[,1], mfreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mfreq_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mfreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mfreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Log Frequency--------------------------------------------')
efreq_rand <- extract(fit_efreq, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 500), ylim=c(0, 12), type="n",
     xlab = "Eye tracking value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(efreq_rand[,1], efreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(efreq_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(efreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(efreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- MoTR & Length --------------------------------------------')
mlen_rand <- extract(fit_mlen, "x_rand")[[1]]
# mlen_rand

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "MoTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlen_rand[,1], mlen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mlen_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Length --------------------------------------------')
elen_rand <- extract(fit_elen, "x_rand")[[1]]
# elen_rand

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "EyeTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(elen_rand[,1], elen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(elen_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(elen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- MoTR & Surprisal --------------------------------------------')
msurp_rand <- extract(fit_msurp, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "MoTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(msurp_rand [,1], msurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(msurp_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(msurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(msurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Surprisal --------------------------------------------')
esurp_rand <- extract(fit_esurp, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "EyeTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(esurp_rand [,1], esurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(esurp_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(esurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(esurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```


```{r}
print("EyeTR vs. EyeTR Fisrt Pass Regression Prob. < 0.3 ")

ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5))

ereg_df_low = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)) %>%
  filter(eyetr_value_1 < 0.3)
# View(ereg_df_low)

ereg_df_high = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)) %>%
  filter(eyetr_value_1 >= 0.3)
# View(ereg_df_high) 

print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$p.value)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$estimate)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$p.value)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$estimate)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$p.value)

# View(egd_df)

ereg_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()
ereg_temp_low <- ereg_df_low[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()
ereg_temp_high <- ereg_df_high[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
     main = "FPReg all data Not Log-Transformed")
plot(ereg_temp_low, pch = 16, col = "blue",
     main = "FPReg < 0.3 Not Log-Transformed")
plot(ereg_temp_high, pch = 16, col = "blue",
     main = "FPReg > 0.3 Not Log-Transformed")
```


```{r, eval=FALSE}
# -------fit model eyetr vs. eyetr FPReg <0.3 & >=0.3 ----------
reg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_all_data.rds"))
```



# exploratory: divide eye tracking regression data into two parts.
```{r}
# fit_ereg_all = readRDS("./eyetr_eyetr_FPReg_cor_all_data.rds")
fit_ereg_all = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor.rds")
fit_ereg_low = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_00-03.rds")
fit_ereg_high = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_03-1.rds")

print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
print(fit_ereg_all)
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
print(fit_ereg_low)
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
print(fit_ereg_high)

```


```{r}
# # FPReg all data
stan_trace(fit_ereg_all)
stan_dens(fit_ereg_all, separate_chains = TRUE)
stan_plot(fit_ereg_all)

# # FPReg < 0.3
stan_trace(fit_ereg_low)
stan_dens(fit_ereg_low, separate_chains = TRUE)
stan_plot(fit_ereg_low)

# FPReg >= 0.3
stan_trace(fit_ereg_high)
stan_dens(fit_ereg_high, separate_chains = TRUE)
stan_plot(fit_ereg_high)
```

```{r}
print('---------------------------- First Pass Regression all data--------------------------------------------')
rho_ereg_all = as.numeric(extract(fit_ereg_all, "rho")[[1]])
mean = mean(rho_ereg_all)
crI = quantile(rho_ereg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")


print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
rho_ereg_low = as.numeric(extract(fit_ereg_low, "rho")[[1]])
mean = mean(rho_ereg_low)
crI = quantile(rho_ereg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")


print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
rho_ereg_high = as.numeric(extract(fit_ereg_high, "rho")[[1]])
mean = mean(rho_ereg_high)
crI = quantile(rho_ereg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
```

```{r}
print('---------------------------- First Pass Regression all data --------------------------------------------')
eallreg_rand <- extract(fit_ereg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(eallreg_rand[,1], eallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(eallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(eallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
elowreg_rand <- extract(fit_ereg_low, "x_rand")[[1]]
# print(elowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(elowreg_rand[,1], elowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_low, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(elowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
ehighreg_rand_samples <- extract(fit_ereg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(ehighreg_rand_samples), 900)
ehighreg_rand <- ehighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(ehighreg_rand[,1], ehighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_high, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(ehighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ehighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```
